Skip to content

Commit c69d839

Browse files
committed
[AArch64][MachineScheduler] Set no side effect for movprfx
The movprfx is a vector copy, so it doesn't access memory. Set the value of hasSideEffects 0 to avoid return true for the hasUnmodeledSideEffects(), which will block the machine scheduler which load/store instructions. Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D140680
1 parent 794056e commit c69d839

File tree

7 files changed

+61
-30
lines changed

7 files changed

+61
-30
lines changed

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7916,6 +7916,8 @@ class sve_int_bin_cons_misc_0_c<bits<8> opc, string asm, ZPRRegOp zprty>
79167916
let Inst{10} = opc{0};
79177917
let Inst{9-5} = Zn;
79187918
let Inst{4-0} = Zd;
7919+
7920+
let hasSideEffects = 0;
79197921
}
79207922

79217923
multiclass sve_int_bin_cons_misc_0_c_fexpa<string asm, SDPatternOperator op> {
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple aarch64 -mcpu=tsv110 -mattr=+sve < %s | FileCheck %s
3+
4+
; Check that the movprfx intrinsic does not prevent load instructions from
5+
; being scheduled together. As load instructions have long latency, expected
6+
; be preferentially issued.
7+
8+
9+
; NOTE: The unused paramter ensures z0/z1 is free, avoiding the antidependence for schedule.
10+
define <vscale x 2 x i64> @and_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64>* %base) {
11+
; CHECK-LABEL: and_i64_zero:
12+
; CHECK: // %bb.0:
13+
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
14+
; CHECK-NEXT: ptrue p1.d
15+
; CHECK-NEXT: movprfx z0, z2
16+
; CHECK-NEXT: abs z0.d, p1/m, z2.d
17+
; CHECK-NEXT: add z0.d, z0.d, z1.d
18+
; CHECK-NEXT: ret
19+
%data0 = tail call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> %c, i1 0)
20+
%data1 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64>* %base,
21+
i32 1,
22+
<vscale x 2 x i1> %pg,
23+
<vscale x 2 x i64> undef)
24+
%out = add <vscale x 2 x i64> %data0, %data1
25+
ret <vscale x 2 x i64> %out
26+
}
27+
28+
declare <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64>, i1)
29+
declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64>*, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)

llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -587,14 +587,14 @@ define void @srem_v16i32(ptr %a, ptr %b) #0 {
587587
; VBITS_GE_128-NEXT: ptrue p0.s, vl4
588588
; VBITS_GE_128-NEXT: ldp q2, q3, [x0]
589589
; VBITS_GE_128-NEXT: ldp q4, q5, [x1, #32]
590-
; VBITS_GE_128-NEXT: ldp q7, q6, [x1]
591590
; VBITS_GE_128-NEXT: movprfx z16, z0
592591
; VBITS_GE_128-NEXT: sdiv z16.s, p0/m, z16.s, z4.s
593592
; VBITS_GE_128-NEXT: mls v0.4s, v16.4s, v4.4s
594-
; VBITS_GE_128-NEXT: movprfx z4, z3
595-
; VBITS_GE_128-NEXT: sdiv z4.s, p0/m, z4.s, z6.s
596593
; VBITS_GE_128-NEXT: movprfx z16, z1
597594
; VBITS_GE_128-NEXT: sdiv z16.s, p0/m, z16.s, z5.s
595+
; VBITS_GE_128-NEXT: ldp q7, q6, [x1]
596+
; VBITS_GE_128-NEXT: movprfx z4, z3
597+
; VBITS_GE_128-NEXT: sdiv z4.s, p0/m, z4.s, z6.s
598598
; VBITS_GE_128-NEXT: mls v1.4s, v16.4s, v5.4s
599599
; VBITS_GE_128-NEXT: movprfx z5, z2
600600
; VBITS_GE_128-NEXT: sdiv z5.s, p0/m, z5.s, z7.s
@@ -1407,14 +1407,14 @@ define void @urem_v16i32(ptr %a, ptr %b) #0 {
14071407
; VBITS_GE_128-NEXT: ptrue p0.s, vl4
14081408
; VBITS_GE_128-NEXT: ldp q2, q3, [x0]
14091409
; VBITS_GE_128-NEXT: ldp q4, q5, [x1, #32]
1410-
; VBITS_GE_128-NEXT: ldp q7, q6, [x1]
14111410
; VBITS_GE_128-NEXT: movprfx z16, z0
14121411
; VBITS_GE_128-NEXT: udiv z16.s, p0/m, z16.s, z4.s
14131412
; VBITS_GE_128-NEXT: mls v0.4s, v16.4s, v4.4s
1414-
; VBITS_GE_128-NEXT: movprfx z4, z3
1415-
; VBITS_GE_128-NEXT: udiv z4.s, p0/m, z4.s, z6.s
14161413
; VBITS_GE_128-NEXT: movprfx z16, z1
14171414
; VBITS_GE_128-NEXT: udiv z16.s, p0/m, z16.s, z5.s
1415+
; VBITS_GE_128-NEXT: ldp q7, q6, [x1]
1416+
; VBITS_GE_128-NEXT: movprfx z4, z3
1417+
; VBITS_GE_128-NEXT: udiv z4.s, p0/m, z4.s, z6.s
14181418
; VBITS_GE_128-NEXT: mls v1.4s, v16.4s, v5.4s
14191419
; VBITS_GE_128-NEXT: movprfx z5, z2
14201420
; VBITS_GE_128-NEXT: udiv z5.s, p0/m, z5.s, z7.s

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -578,8 +578,8 @@ define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) #0 {
578578
; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d
579579
; CHECK-NEXT: mov z3.s, z2.s[1]
580580
; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s
581-
; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d
582581
; CHECK-NEXT: ldp q0, q1, [x0, #64]
582+
; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d
583583
; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s
584584
; CHECK-NEXT: fmov w10, s5
585585
; CHECK-NEXT: mov z5.s, z5.s[1]
@@ -590,18 +590,18 @@ define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) #0 {
590590
; CHECK-NEXT: fmov w8, s4
591591
; CHECK-NEXT: strh w9, [sp, #8]
592592
; CHECK-NEXT: fmov w9, s6
593-
; CHECK-NEXT: strh w10, [sp, #4]
594593
; CHECK-NEXT: mov z4.s, z4.s[1]
594+
; CHECK-NEXT: strh w10, [sp, #4]
595595
; CHECK-NEXT: strh w8, [sp]
596596
; CHECK-NEXT: fmov w8, s3
597-
; CHECK-NEXT: strh w9, [sp, #14]
598597
; CHECK-NEXT: movprfx z3, z7
599598
; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.d
600-
; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
601599
; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
600+
; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
601+
; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
602602
; CHECK-NEXT: strh w8, [sp, #10]
603603
; CHECK-NEXT: fmov w8, s3
604-
; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
604+
; CHECK-NEXT: strh w9, [sp, #14]
605605
; CHECK-NEXT: fmov w9, s5
606606
; CHECK-NEXT: fmov w10, s4
607607
; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
@@ -1331,8 +1331,8 @@ define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) #0 {
13311331
; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d
13321332
; CHECK-NEXT: mov z3.s, z2.s[1]
13331333
; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s
1334-
; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d
13351334
; CHECK-NEXT: ldp q0, q1, [x0, #64]
1335+
; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d
13361336
; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s
13371337
; CHECK-NEXT: fmov w10, s5
13381338
; CHECK-NEXT: mov z5.s, z5.s[1]
@@ -1343,18 +1343,18 @@ define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) #0 {
13431343
; CHECK-NEXT: fmov w8, s4
13441344
; CHECK-NEXT: strh w9, [sp, #8]
13451345
; CHECK-NEXT: fmov w9, s6
1346-
; CHECK-NEXT: strh w10, [sp, #4]
13471346
; CHECK-NEXT: mov z4.s, z4.s[1]
1347+
; CHECK-NEXT: strh w10, [sp, #4]
13481348
; CHECK-NEXT: strh w8, [sp]
13491349
; CHECK-NEXT: fmov w8, s3
1350-
; CHECK-NEXT: strh w9, [sp, #14]
13511350
; CHECK-NEXT: movprfx z3, z7
13521351
; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.d
1353-
; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
13541352
; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
1353+
; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
1354+
; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
13551355
; CHECK-NEXT: strh w8, [sp, #10]
13561356
; CHECK-NEXT: fmov w8, s3
1357-
; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
1357+
; CHECK-NEXT: strh w9, [sp, #14]
13581358
; CHECK-NEXT: fmov w9, s5
13591359
; CHECK-NEXT: fmov w10, s4
13601360
; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -319,14 +319,14 @@ define void @bswap_v8i32(ptr %a) #0 {
319319
; CHECK: // %bb.0:
320320
; CHECK-NEXT: ldp q1, q0, [x0]
321321
; CHECK-NEXT: ptrue p0.s, vl4
322-
; CHECK-NEXT: movprfx z2, z0
323-
; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #24
324322
; CHECK-NEXT: movprfx z3, z0
325323
; CHECK-NEXT: lsr z3.s, p0/m, z3.s, #8
326-
; CHECK-NEXT: movprfx z4, z1
327-
; CHECK-NEXT: lsr z4.s, p0/m, z4.s, #24
328324
; CHECK-NEXT: movprfx z5, z1
329325
; CHECK-NEXT: lsr z5.s, p0/m, z5.s, #8
326+
; CHECK-NEXT: movprfx z2, z0
327+
; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #24
328+
; CHECK-NEXT: movprfx z4, z1
329+
; CHECK-NEXT: lsr z4.s, p0/m, z4.s, #24
330330
; CHECK-NEXT: and z3.s, z3.s, #0xff00
331331
; CHECK-NEXT: and z5.s, z5.s, #0xff00
332332
; CHECK-NEXT: orr z2.d, z3.d, z2.d
@@ -356,10 +356,10 @@ define <1 x i64> @bswap_v1i64(<1 x i64> %op) #0 {
356356
; CHECK: // %bb.0:
357357
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
358358
; CHECK-NEXT: ptrue p0.d, vl1
359-
; CHECK-NEXT: movprfx z1, z0
360-
; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56
361359
; CHECK-NEXT: movprfx z2, z0
362360
; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #40
361+
; CHECK-NEXT: movprfx z1, z0
362+
; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56
363363
; CHECK-NEXT: movprfx z3, z0
364364
; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #24
365365
; CHECK-NEXT: movprfx z4, z0
@@ -396,10 +396,10 @@ define <2 x i64> @bswap_v2i64(<2 x i64> %op) #0 {
396396
; CHECK: // %bb.0:
397397
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
398398
; CHECK-NEXT: ptrue p0.d, vl2
399-
; CHECK-NEXT: movprfx z1, z0
400-
; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56
401399
; CHECK-NEXT: movprfx z2, z0
402400
; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #40
401+
; CHECK-NEXT: movprfx z1, z0
402+
; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56
403403
; CHECK-NEXT: movprfx z3, z0
404404
; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #24
405405
; CHECK-NEXT: movprfx z4, z0
@@ -436,23 +436,21 @@ define void @bswap_v4i64(ptr %a) #0 {
436436
; CHECK: // %bb.0:
437437
; CHECK-NEXT: ldp q1, q0, [x0]
438438
; CHECK-NEXT: ptrue p0.d, vl2
439-
; CHECK-NEXT: movprfx z2, z0
440-
; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #56
441439
; CHECK-NEXT: movprfx z3, z0
442440
; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #40
443441
; CHECK-NEXT: movprfx z4, z0
444442
; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #24
445443
; CHECK-NEXT: movprfx z5, z0
446444
; CHECK-NEXT: lsr z5.d, p0/m, z5.d, #8
445+
; CHECK-NEXT: movprfx z2, z0
446+
; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #56
447447
; CHECK-NEXT: and z3.d, z3.d, #0xff00
448448
; CHECK-NEXT: and z4.d, z4.d, #0xff0000
449449
; CHECK-NEXT: and z5.d, z5.d, #0xff000000
450450
; CHECK-NEXT: orr z2.d, z3.d, z2.d
451451
; CHECK-NEXT: orr z3.d, z5.d, z4.d
452452
; CHECK-NEXT: mov z6.d, z0.d
453453
; CHECK-NEXT: mov z7.d, z0.d
454-
; CHECK-NEXT: movprfx z16, z0
455-
; CHECK-NEXT: lsl z16.d, p0/m, z16.d, #56
456454
; CHECK-NEXT: orr z2.d, z3.d, z2.d
457455
; CHECK-NEXT: and z6.d, z6.d, #0xff000000
458456
; CHECK-NEXT: and z7.d, z7.d, #0xff0000
@@ -463,6 +461,8 @@ define void @bswap_v4i64(ptr %a) #0 {
463461
; CHECK-NEXT: orr z3.d, z4.d, z3.d
464462
; CHECK-NEXT: movprfx z4, z1
465463
; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #40
464+
; CHECK-NEXT: movprfx z16, z0
465+
; CHECK-NEXT: lsl z16.d, p0/m, z16.d, #56
466466
; CHECK-NEXT: and z0.d, z0.d, #0xff00
467467
; CHECK-NEXT: movprfx z5, z1
468468
; CHECK-NEXT: lsr z5.d, p0/m, z5.d, #56

llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3928,7 +3928,7 @@ zip2 z31.s, z31.s, z31.s
39283928
# CHECK-NEXT: 1 4 1.00 mov z21.s, p15/m, #-32768
39293929
# CHECK-NEXT: 1 4 0.50 mov z31.b, p15/m, z31.b
39303930
# CHECK-NEXT: 1 6 1.00 U mov z31.b, p7/m, b31
3931-
# CHECK-NEXT: 1 1 0.17 U movprfx z31, z6
3931+
# CHECK-NEXT: 1 1 0.17 movprfx z31, z6
39323932
# CHECK-NEXT: 1 8 1.00 mov z31.b, p7/m, wsp
39333933
# CHECK-NEXT: 1 6 1.00 mov z31.b, wsp
39343934
# CHECK-NEXT: 1 4 1.00 mov z31.b, z31.b[63]

llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5030,7 +5030,7 @@ zip2 z31.s, z31.s, z31.s
50305030
# CHECK-NEXT: 1 2 0.50 mov z21.s, p15/m, #-32768
50315031
# CHECK-NEXT: 1 2 0.50 mov z31.b, p15/m, z31.b
50325032
# CHECK-NEXT: 1 2 0.50 U mov z31.b, p7/m, b31
5033-
# CHECK-NEXT: 1 2 0.50 U movprfx z31, z6
5033+
# CHECK-NEXT: 1 2 0.50 movprfx z31, z6
50345034
# CHECK-NEXT: 2 5 1.00 mov z31.b, p7/m, wsp
50355035
# CHECK-NEXT: 1 3 3.00 mov z31.b, wsp
50365036
# CHECK-NEXT: 1 2 0.50 mov z31.b, z31.b[63]

0 commit comments

Comments
 (0)