Skip to content

Commit 111df10

Browse files
author
Evandro Menezes
committed
[AArch64] Update for Exynos
Fix the forwarding of multiplication results for Exynos M4. llvm-svn: 359834
1 parent 47d8865 commit 111df10

File tree

3 files changed

+18
-82
lines changed

3 files changed

+18
-82
lines changed

llvm/lib/Target/AArch64/AArch64SchedExynosM4.td

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,6 @@ def M4WriteNEONK : SchedWriteRes<[M4UnitNSHF,
238238
M4UnitS0]> { let Latency = 5;
239239
let NumMicroOps = 2; }
240240
def M4WriteNEONL : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; }
241-
def M4WriteNEONM : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; }
242241
def M4WriteNEONN : SchedWriteRes<[M4UnitNMSC,
243242
M4UnitNMSC]> { let Latency = 5;
244243
let NumMicroOps = 2; }
@@ -479,16 +478,15 @@ def M4WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M4WriteNALU1]>,
479478
SchedVar<NoSchedPred, [M4WriteZ0]>]>;
480479
def M4WriteMOVI : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M4WriteZ0]>,
481480
SchedVar<NoSchedPred, [M4WriteNALU1]>]>;
482-
def M4WriteMULL : SchedWriteVariant<[SchedVar<ExynosLongVectorUpperPred, [M4WriteNEONM]>,
483-
SchedVar<NoSchedPred, [M4WriteNMUL3]>]>;
484481

485482
// Fast forwarding.
486483
def M4ReadAESM1 : SchedReadAdvance<+1, [M4WriteNCRY1]>;
487484
def M4ReadFMACM1 : SchedReadAdvance<+1, [M4WriteFMAC4,
488485
M4WriteFMAC4H,
489486
M4WriteFMAC5]>;
490487
def M4ReadNMULM1 : SchedReadAdvance<+1, [M4WriteNMUL3]>;
491-
def M4ReadMULLP2 : SchedReadAdvance<-2, [M4WriteNEONM]>;
488+
def M4ReadNMULP2 : SchedReadAdvance<-2, [M4WriteNMUL3]>;
489+
492490

493491
//===----------------------------------------------------------------------===//
494492
// Coarse scheduling model.
@@ -661,10 +659,8 @@ def : InstRW<[M4WriteNEONK], (instregex "^FMOVDXHighr")>;
661659
def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev1f16")>;
662660
def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev1i(32|64)")>;
663661
def : InstRW<[M4WriteNMSC1], (instregex "^FRECPXv1")>;
664-
def : InstRW<[M4WriteFMAC4H,
665-
M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S16")>;
666-
def : InstRW<[M4WriteFMAC4,
667-
M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S(32|64)")>;
662+
def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)S16")>;
663+
def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)S(32|64)")>;
668664

669665
// FP load instructions.
670666
def : InstRW<[WriteVLD], (instregex "^LDR[SDQ]l")>;
@@ -735,14 +731,20 @@ def : InstRW<[M4WriteNALU1], (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>;
735731
def : InstRW<[M4WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>;
736732
def : InstRW<[M4WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>;
737733
def : InstRW<[M4WriteNHAD3], (instregex "^[SU](MIN|MAX)Vv")>;
738-
def : InstRW<[M4WriteNMUL3], (instregex "^(SQR?D)?MULH?v")>;
739734
def : InstRW<[M4WriteNMUL3,
740735
M4ReadNMULM1], (instregex "^ML[AS]v")>;
741-
def : InstRW<[M4WriteNMUL3], (instregex "^SQRDML[AS]H")>;
742-
def : InstRW<[M4WriteMULL,
743-
M4ReadMULLP2], (instregex "^(S|U|SQD)ML[AS]Lv")>;
744-
def : InstRW<[M4WriteMULL,
745-
M4ReadMULLP2], (instregex "^(S|U|SQD)MULLv")>;
736+
def : InstRW<[M4WriteNMUL3,
737+
M4ReadNMULM1], (instregex "^(SQR?D)?MULH?v")>;
738+
def : InstRW<[M4WriteNMUL3,
739+
M4ReadNMULM1], (instregex "^SQRDML[AS]H")>;
740+
def : InstRW<[M4WriteNMUL3,
741+
M4ReadNMULM1], (instregex "^(S|U|SQD)ML[AS]L(v1(i32|i64)|v2i32|v4i16|v8i8)")>;
742+
def : InstRW<[M4WriteNMUL3,
743+
M4ReadNMULP2], (instregex "^(S|U|SQD)ML[AS]L(v4i32|v8i16|v16i8)")>;
744+
def : InstRW<[M4WriteNMUL3,
745+
M4ReadNMULM1], (instregex "^(S|U|SQD)MULL(v1(i32|i64)|v2i32|v4i16|v8i8)")>;
746+
def : InstRW<[M4WriteNMUL3,
747+
M4ReadNMULP2], (instregex "^(S|U|SQD)MULL(v4i32|v8i16|v16i8)")>;
746748
def : InstRW<[M4WriteNMUL3], (instregex "^[SU]DOT(lane)?v")>;
747749
def : InstRW<[M4WriteNHAD3], (instregex "^[SU]ADALPv")>;
748750
def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>;
@@ -807,10 +809,8 @@ def : InstRW<[M4WriteNALU1], (instregex "^FMOVv.f(32|64)")>;
807809
def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev[248]f16")>;
808810
def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev[248]f(32|64)")>;
809811
def : InstRW<[M4WriteFCVT3], (instregex "^U(RECP|RSQRT)Ev[24]i32")>;
810-
def : InstRW<[M4WriteFMAC4H,
811-
M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f16")>;
812-
def : InstRW<[M4WriteFMAC4,
813-
M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>;
812+
def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)Sv.f16")>;
813+
def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>;
814814
def : InstRW<[M4WriteNSHF1], (instregex "^REV(16|32|64)v")>;
815815
def : InstRW<[M4WriteNSHFA], (instregex "^TB[LX]v(8|16)i8One")>;
816816
def : InstRW<[M4WriteNSHFB], (instregex "^TB[LX]v(8|16)i8Two")>;

llvm/lib/Target/AArch64/AArch64SchedPredExynos.td

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -102,17 +102,6 @@ def ExynosScaledIdxPred : MCSchedPredicate<ExynosScaledIdxFn>;
102102
// Identify FP instructions.
103103
def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckDForm, CheckQForm]>>;
104104

105-
// Identify whether an instruction whose result is a long vector
106-
// operates on the upper half of the input registers.
107-
def ExynosLongVectorUpperFn : TIIPredicate<
108-
"isExynosLongVectorUpper",
109-
MCOpcodeSwitchStatement<
110-
[MCOpcodeSwitchCase<
111-
IsLongVectorUpperOp.ValidOpcodes,
112-
MCReturnStatement<TruePred>>],
113-
MCReturnStatement<FalsePred>>>;
114-
def ExynosLongVectorUpperPred : MCSchedPredicate<ExynosLongVectorUpperFn>;
115-
116105
// Identify 128-bit NEON instructions.
117106
def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
118107

llvm/lib/Target/AArch64/AArch64SchedPredicates.td

Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -267,59 +267,6 @@ def IsStoreRegOffsetOp : CheckOpcode<[STRBBroW, STRBBroX,
267267
def IsLoadStoreRegOffsetOp : CheckOpcode<!listconcat(IsLoadRegOffsetOp.ValidOpcodes,
268268
IsStoreRegOffsetOp.ValidOpcodes)>;
269269

270-
// Identify whether an instruction whose result is a long vector
271-
// operates on the upper half of the input registers.
272-
def IsLongVectorUpperOp : CheckOpcode<[FCVTLv8i16, FCVTLv4i32,
273-
FCVTNv8i16, FCVTNv4i32,
274-
FCVTXNv4f32,
275-
PMULLv16i8, PMULLv2i64,
276-
RADDHNv8i16_v16i8, RADDHNv4i32_v8i16, RADDHNv2i64_v4i32,
277-
RSHRNv16i8_shift, RSHRNv8i16_shift, RSHRNv4i32_shift,
278-
RSUBHNv8i16_v16i8, RSUBHNv4i32_v8i16, RSUBHNv2i64_v4i32,
279-
SABALv16i8_v8i16, SABALv8i16_v4i32, SABALv4i32_v2i64,
280-
SABDLv16i8_v8i16, SABDLv8i16_v4i32, SABDLv4i32_v2i64,
281-
SADDLv16i8_v8i16, SADDLv8i16_v4i32, SADDLv4i32_v2i64,
282-
SADDWv16i8_v8i16, SADDWv8i16_v4i32, SADDWv4i32_v2i64,
283-
SHLLv16i8, SHLLv8i16, SHLLv4i32,
284-
SHRNv16i8_shift, SHRNv8i16_shift, SHRNv4i32_shift,
285-
SMLALv16i8_v8i16, SMLALv8i16_v4i32, SMLALv4i32_v2i64,
286-
SMLALv8i16_indexed, SMLALv4i32_indexed,
287-
SMLSLv16i8_v8i16, SMLSLv8i16_v4i32, SMLSLv4i32_v2i64,
288-
SMLSLv8i16_indexed, SMLSLv4i32_indexed,
289-
SMULLv16i8_v8i16, SMULLv8i16_v4i32, SMULLv4i32_v2i64,
290-
SMULLv8i16_indexed, SMULLv4i32_indexed,
291-
SQDMLALv8i16_v4i32, SQDMLALv4i32_v2i64,
292-
SQDMLALv8i16_indexed, SQDMLALv4i32_indexed,
293-
SQDMLSLv8i16_v4i32, SQDMLSLv4i32_v2i64,
294-
SQDMLSLv8i16_indexed, SQDMLSLv4i32_indexed,
295-
SQDMULLv8i16_v4i32, SQDMULLv4i32_v2i64,
296-
SQDMULLv8i16_indexed, SQDMULLv4i32_indexed,
297-
SQRSHRNv16i8_shift, SQRSHRNv8i16_shift, SQRSHRNv4i32_shift,
298-
SQRSHRUNv16i8_shift, SQRSHRUNv8i16_shift, SQRSHRUNv4i32_shift,
299-
SQSHRNv16i8_shift, SQSHRNv8i16_shift, SQSHRNv4i32_shift,
300-
SQSHRUNv16i8_shift, SQSHRUNv8i16_shift, SQSHRUNv4i32_shift,
301-
SQXTNv16i8, SQXTNv8i16, SQXTNv4i32,
302-
SQXTUNv16i8, SQXTUNv8i16, SQXTUNv4i32,
303-
SSHLLv16i8_shift, SSHLLv8i16_shift, SSHLLv4i32_shift,
304-
SSUBLv16i8_v8i16, SSUBLv8i16_v4i32, SSUBLv4i32_v2i64,
305-
SSUBWv16i8_v8i16, SSUBWv8i16_v4i32, SSUBWv4i32_v2i64,
306-
UABALv16i8_v8i16, UABALv8i16_v4i32, UABALv4i32_v2i64,
307-
UABDLv16i8_v8i16, UABDLv8i16_v4i32, UABDLv4i32_v2i64,
308-
UADDLv16i8_v8i16, UADDLv8i16_v4i32, UADDLv4i32_v2i64,
309-
UADDWv16i8_v8i16, UADDWv8i16_v4i32, UADDWv4i32_v2i64,
310-
UMLALv16i8_v8i16, UMLALv8i16_v4i32, UMLALv4i32_v2i64,
311-
UMLALv8i16_indexed, UMLALv4i32_indexed,
312-
UMLSLv16i8_v8i16, UMLSLv8i16_v4i32, UMLSLv4i32_v2i64,
313-
UMLSLv8i16_indexed, UMLSLv4i32_indexed,
314-
UMULLv16i8_v8i16, UMULLv8i16_v4i32, UMULLv4i32_v2i64,
315-
UMULLv8i16_indexed, UMULLv4i32_indexed,
316-
UQSHRNv16i8_shift, UQSHRNv8i16_shift, UQSHRNv4i32_shift,
317-
UQXTNv16i8, UQXTNv8i16, UQXTNv4i32,
318-
USHLLv16i8_shift, USHLLv8i16_shift, USHLLv4i32_shift,
319-
USUBLv16i8_v8i16, USUBLv8i16_v4i32, USUBLv4i32_v2i64,
320-
USUBWv16i8_v8i16, USUBWv8i16_v4i32, USUBWv4i32_v2i64,
321-
XTNv16i8, XTNv8i16, XTNv4i32]>;
322-
323270
// Target predicates.
324271

325272
// Identify an instruction that effectively transfers a register to another.

0 commit comments

Comments
 (0)