Skip to content

[AArch64] Alter latency of FCSEL under Cortex-A510 #80178

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64SchedA510.td
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,8 @@ def : InstRW<[CortexA510WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
def : InstRW<[CortexA510WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
def : InstRW<[CortexA510WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;

def : InstRW<[CortexA510WriteFPALU_F3], (instrs FCSELHrrr, FCSELSrrr, FCSELDrrr)>;

// 4.15. Advanced SIMD integer instructions
// ASIMD absolute diff
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]ABDv(2i32|4i16|8i8)")>;
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AArch64/select_fmf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ define float @select_select_fold_select_and(float %w, float %x, float %y, float
; CHECK: // %bb.0:
; CHECK-NEXT: fminnm s4, s1, s2
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: fmaxnm s1, s0, s3
; CHECK-NEXT: fmaxnm s2, s0, s3
; CHECK-NEXT: fmov s1, #0.50000000
; CHECK-NEXT: fccmp s4, s0, #4, lt
; CHECK-NEXT: fmov s4, #0.50000000
; CHECK-NEXT: fcsel s2, s1, s0, gt
; CHECK-NEXT: fadd s1, s0, s4
; CHECK-NEXT: fadd s1, s0, s1
; CHECK-NEXT: fcsel s2, s2, s0, gt
; CHECK-NEXT: fadd s4, s1, s2
; CHECK-NEXT: fcmp s4, s1
; CHECK-NEXT: b.le .LBB0_2
Expand Down Expand Up @@ -67,11 +67,11 @@ define float @select_select_fold_select_or(float %w, float %x, float %y, float %
; CHECK: // %bb.0:
; CHECK-NEXT: fminnm s4, s1, s2
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: fmaxnm s1, s0, s3
; CHECK-NEXT: fmaxnm s2, s0, s3
; CHECK-NEXT: fmov s1, #0.50000000
; CHECK-NEXT: fccmp s4, s0, #0, ge
; CHECK-NEXT: fmov s4, #0.50000000
; CHECK-NEXT: fcsel s2, s0, s1, gt
; CHECK-NEXT: fadd s1, s0, s4
; CHECK-NEXT: fadd s1, s0, s1
; CHECK-NEXT: fcsel s2, s0, s2, gt
; CHECK-NEXT: fadd s4, s1, s2
; CHECK-NEXT: fcmp s4, s1
; CHECK-NEXT: b.le .LBB1_2
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/tbl-loops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -562,25 +562,25 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
; CHECK-NEXT: fcmp s3, s1
; CHECK-NEXT: fcsel s4, s1, s3, gt
; CHECK-NEXT: fcmp s3, #0.0
; CHECK-NEXT: ldp s3, s5, [x8, #8]
; CHECK-NEXT: fcvtzs w11, s2
; CHECK-NEXT: ldp s3, s5, [x8, #8]
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: fcsel s4, s0, s4, mi
; CHECK-NEXT: fcmp s3, s1
; CHECK-NEXT: strb w11, [x9]
; CHECK-NEXT: fcvtzs w12, s4
; CHECK-NEXT: fcsel s6, s1, s3, gt
; CHECK-NEXT: fcmp s3, #0.0
; CHECK-NEXT: fcvtzs w12, s4
; CHECK-NEXT: fcsel s3, s0, s6, mi
; CHECK-NEXT: fcmp s5, s1
; CHECK-NEXT: strb w12, [x9, #1]
; CHECK-NEXT: fcsel s6, s1, s5, gt
; CHECK-NEXT: fcmp s5, #0.0
; CHECK-NEXT: fcvtzs w13, s3
; CHECK-NEXT: fcsel s5, s0, s6, mi
; CHECK-NEXT: fcsel s2, s0, s6, mi
; CHECK-NEXT: subs w10, w10, #1
; CHECK-NEXT: strb w13, [x9, #2]
; CHECK-NEXT: fcvtzs w14, s5
; CHECK-NEXT: fcvtzs w14, s2
; CHECK-NEXT: strb w14, [x9, #3]
; CHECK-NEXT: add x9, x9, #4
; CHECK-NEXT: b.ne .LBB3_6
Expand Down
140 changes: 70 additions & 70 deletions llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -242,59 +242,59 @@ define half @test_v16f16(<16 x half> %a) nounwind {
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3]
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[4]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4]
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5]
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[6]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6]
; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7]
; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7]
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s1, h1
; CHECK-NOFP-SD-NEXT: fcvt s0, h0
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
; CHECK-NOFP-SD-NEXT: fcmp s0, s1
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcsel s0, s0, s1, gt
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcvt h0, s0
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s0, h0
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcvt h1, s2
Expand Down Expand Up @@ -420,66 +420,66 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcvt s17, h17
; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s4, h4
; CHECK-NOFP-NEXT: fcmp s1, s16
; CHECK-NOFP-NEXT: fcsel s1, s1, s16, gt
; CHECK-NOFP-NEXT: fcmp s0, s17
; CHECK-NOFP-NEXT: ldr h16, [sp, #16]
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcsel s1, s2, s16, gt
; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI14_0]
; CHECK-NOFP-NEXT: mov w8, #-8388608 // =0xff800000
; CHECK-NOFP-NEXT: fcvt s2, h2
; CHECK-NOFP-NEXT: fmov s16, w8
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcmp s3, s2
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
; CHECK-NOFP-NEXT: fcvt s3, h4
; CHECK-NOFP-NEXT: fmov s1, w8
; CHECK-NOFP-NEXT: fcsel s3, s3, s1, gt
; CHECK-NOFP-NEXT: fcmp s4, s2
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcmp s3, s2
; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
; CHECK-NOFP-NEXT: fcvt s3, h5
; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
; CHECK-NOFP-NEXT: fcvt s4, h5
; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcmp s3, s2
; CHECK-NOFP-NEXT: fcmp s4, s2
; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
; CHECK-NOFP-NEXT: fcvt s3, h6
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
; CHECK-NOFP-NEXT: fcvt s4, h6
; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcmp s3, s2
; CHECK-NOFP-NEXT: fcmp s4, s2
; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
; CHECK-NOFP-NEXT: fcvt s3, h7
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
; CHECK-NOFP-NEXT: fcvt s4, h7
; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcmp s3, s2
; CHECK-NOFP-NEXT: fcmp s4, s2
; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcsel s1, s4, s1, gt
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: ret
Expand Down Expand Up @@ -527,15 +527,16 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcvt s17, h17
; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s4, h4
; CHECK-NOFP-NEXT: fcmp s1, s16
; CHECK-NOFP-NEXT: fcsel s1, s1, s16, gt
; CHECK-NOFP-NEXT: fcmp s0, s17
; CHECK-NOFP-NEXT: ldr h16, [sp, #16]
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
Expand All @@ -544,50 +545,49 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: mov w8, #57344 // =0xe000
; CHECK-NOFP-NEXT: fcvt s2, h2
; CHECK-NOFP-NEXT: movk w8, #51071, lsl #16
; CHECK-NOFP-NEXT: fmov s16, w8
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcmp s3, s2
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
; CHECK-NOFP-NEXT: fcvt s3, h4
; CHECK-NOFP-NEXT: fmov s1, w8
; CHECK-NOFP-NEXT: fcsel s3, s3, s1, gt
; CHECK-NOFP-NEXT: fcmp s4, s2
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcmp s3, s2
; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
; CHECK-NOFP-NEXT: fcvt s3, h5
; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
; CHECK-NOFP-NEXT: fcvt s4, h5
; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcmp s3, s2
; CHECK-NOFP-NEXT: fcmp s4, s2
; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
; CHECK-NOFP-NEXT: fcvt s3, h6
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
; CHECK-NOFP-NEXT: fcvt s4, h6
; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcmp s3, s2
; CHECK-NOFP-NEXT: fcmp s4, s2
; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
; CHECK-NOFP-NEXT: fcvt s3, h7
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
; CHECK-NOFP-NEXT: fcvt s4, h7
; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcmp s3, s2
; CHECK-NOFP-NEXT: fcmp s4, s2
; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcsel s1, s4, s1, gt
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: ret
Expand Down
Loading