Skip to content

Commit ee4e819

Browse files
[LLVM][AArch64][SVE] Mark DUP immediate instructions with isAsCheapAsAMove. (llvm#133945)
Doing this means we'll regenerate an immediate rather than copy the result of an existing one, reducing instruction dependency chains.
1 parent cb0d130 commit ee4e819

16 files changed

+228
-230
lines changed

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2113,6 +2113,7 @@ class sve_int_dup_mask_imm<string asm>
21132113

21142114
let DecoderMethod = "DecodeSVELogicalImmInstruction";
21152115
let hasSideEffects = 0;
2116+
let isAsCheapAsAMove = 1;
21162117
let isReMaterializable = 1;
21172118
let Uses = [VG];
21182119
}
@@ -5118,6 +5119,7 @@ class sve_int_dup_imm<bits<2> sz8_64, string asm,
51185119
let Inst{4-0} = Zd;
51195120

51205121
let hasSideEffects = 0;
5122+
let isAsCheapAsAMove = 1;
51215123
let isReMaterializable = 1;
51225124
let Uses = [VG];
51235125
}
@@ -5161,6 +5163,7 @@ class sve_int_dup_fpimm<bits<2> sz8_64, Operand fpimmtype,
51615163
let Inst{4-0} = Zd;
51625164

51635165
let hasSideEffects = 0;
5166+
let isAsCheapAsAMove = 1;
51645167
let isReMaterializable = 1;
51655168
let Uses = [VG];
51665169
}

llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -51,20 +51,20 @@ define <vscale x 4 x double> @mul_add_mull(<vscale x 4 x double> %a, <vscale x 4
5151
; CHECK-LABEL: mul_add_mull:
5252
; CHECK: // %bb.0: // %entry
5353
; CHECK-NEXT: mov z24.d, #0 // =0x0
54+
; CHECK-NEXT: mov z25.d, #0 // =0x0
55+
; CHECK-NEXT: mov z26.d, #0 // =0x0
56+
; CHECK-NEXT: mov z27.d, #0 // =0x0
5457
; CHECK-NEXT: ptrue p0.d
55-
; CHECK-NEXT: mov z25.d, z24.d
56-
; CHECK-NEXT: mov z26.d, z24.d
57-
; CHECK-NEXT: mov z27.d, z24.d
58-
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
59-
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
60-
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
58+
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #0
59+
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #0
6160
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #0
62-
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
63-
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
64-
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
61+
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #0
62+
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #90
63+
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #90
6564
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #90
66-
; CHECK-NEXT: fadd z1.d, z26.d, z24.d
67-
; CHECK-NEXT: fadd z0.d, z25.d, z27.d
65+
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #90
66+
; CHECK-NEXT: fadd z0.d, z24.d, z27.d
67+
; CHECK-NEXT: fadd z1.d, z25.d, z26.d
6868
; CHECK-NEXT: ret
6969
entry:
7070
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
@@ -102,20 +102,20 @@ define <vscale x 4 x double> @mul_sub_mull(<vscale x 4 x double> %a, <vscale x 4
102102
; CHECK-LABEL: mul_sub_mull:
103103
; CHECK: // %bb.0: // %entry
104104
; CHECK-NEXT: mov z24.d, #0 // =0x0
105+
; CHECK-NEXT: mov z25.d, #0 // =0x0
106+
; CHECK-NEXT: mov z26.d, #0 // =0x0
107+
; CHECK-NEXT: mov z27.d, #0 // =0x0
105108
; CHECK-NEXT: ptrue p0.d
106-
; CHECK-NEXT: mov z25.d, z24.d
107-
; CHECK-NEXT: mov z26.d, z24.d
108-
; CHECK-NEXT: mov z27.d, z24.d
109-
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
110-
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
111-
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
109+
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #0
110+
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #0
112111
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #0
113-
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
114-
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
115-
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
112+
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #0
113+
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #90
114+
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #90
116115
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #90
117-
; CHECK-NEXT: fsub z1.d, z26.d, z24.d
118-
; CHECK-NEXT: fsub z0.d, z25.d, z27.d
116+
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #90
117+
; CHECK-NEXT: fsub z0.d, z24.d, z27.d
118+
; CHECK-NEXT: fsub z1.d, z25.d, z26.d
119119
; CHECK-NEXT: ret
120120
entry:
121121
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
@@ -153,20 +153,20 @@ define <vscale x 4 x double> @mul_conj_mull(<vscale x 4 x double> %a, <vscale x
153153
; CHECK-LABEL: mul_conj_mull:
154154
; CHECK: // %bb.0: // %entry
155155
; CHECK-NEXT: mov z24.d, #0 // =0x0
156+
; CHECK-NEXT: mov z25.d, #0 // =0x0
157+
; CHECK-NEXT: mov z26.d, #0 // =0x0
158+
; CHECK-NEXT: mov z27.d, #0 // =0x0
156159
; CHECK-NEXT: ptrue p0.d
157-
; CHECK-NEXT: mov z25.d, z24.d
158-
; CHECK-NEXT: mov z26.d, z24.d
159-
; CHECK-NEXT: mov z27.d, z24.d
160-
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
161-
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
162-
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
160+
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #0
161+
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #0
163162
; CHECK-NEXT: fcmla z27.d, p0/m, z4.d, z6.d, #0
164-
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
165-
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
166-
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
163+
; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z7.d, #0
164+
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #90
165+
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #90
167166
; CHECK-NEXT: fcmla z27.d, p0/m, z4.d, z6.d, #270
168-
; CHECK-NEXT: fadd z1.d, z26.d, z24.d
169-
; CHECK-NEXT: fadd z0.d, z25.d, z27.d
167+
; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z7.d, #270
168+
; CHECK-NEXT: fadd z0.d, z24.d, z27.d
169+
; CHECK-NEXT: fadd z1.d, z25.d, z26.d
170170
; CHECK-NEXT: ret
171171
entry:
172172
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)

llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -42,18 +42,18 @@ define <vscale x 4 x double> @mul_add_mull(<vscale x 4 x double> %a, <vscale x 4
4242
; CHECK-LABEL: mul_add_mull:
4343
; CHECK: // %bb.0: // %entry
4444
; CHECK-NEXT: mov z24.d, #0 // =0x0
45+
; CHECK-NEXT: mov z25.d, #0 // =0x0
4546
; CHECK-NEXT: ptrue p0.d
46-
; CHECK-NEXT: mov z25.d, z24.d
47-
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
4847
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #0
49-
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
48+
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
5049
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
51-
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
50+
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
5251
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #90
53-
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
52+
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
5453
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
55-
; CHECK-NEXT: mov z1.d, z24.d
54+
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
5655
; CHECK-NEXT: mov z0.d, z25.d
56+
; CHECK-NEXT: mov z1.d, z24.d
5757
; CHECK-NEXT: ret
5858
entry:
5959
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
@@ -91,18 +91,18 @@ define <vscale x 4 x double> @mul_sub_mull(<vscale x 4 x double> %a, <vscale x 4
9191
; CHECK-LABEL: mul_sub_mull:
9292
; CHECK: // %bb.0: // %entry
9393
; CHECK-NEXT: mov z24.d, #0 // =0x0
94+
; CHECK-NEXT: mov z25.d, #0 // =0x0
9495
; CHECK-NEXT: ptrue p0.d
95-
; CHECK-NEXT: mov z25.d, z24.d
96-
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #270
9796
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #270
98-
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
97+
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #270
9998
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
100-
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #180
99+
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
101100
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #180
102-
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
101+
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #180
103102
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
104-
; CHECK-NEXT: mov z1.d, z24.d
103+
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
105104
; CHECK-NEXT: mov z0.d, z25.d
105+
; CHECK-NEXT: mov z1.d, z24.d
106106
; CHECK-NEXT: ret
107107
entry:
108108
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
@@ -140,18 +140,18 @@ define <vscale x 4 x double> @mul_conj_mull(<vscale x 4 x double> %a, <vscale x
140140
; CHECK-LABEL: mul_conj_mull:
141141
; CHECK: // %bb.0: // %entry
142142
; CHECK-NEXT: mov z24.d, #0 // =0x0
143+
; CHECK-NEXT: mov z25.d, #0 // =0x0
143144
; CHECK-NEXT: ptrue p0.d
144-
; CHECK-NEXT: mov z25.d, z24.d
145-
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
146145
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
147-
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
146+
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
148147
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
149-
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
148+
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
150149
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #0
151-
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
150+
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
152151
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #270
153-
; CHECK-NEXT: mov z1.d, z24.d
152+
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
154153
; CHECK-NEXT: mov z0.d, z25.d
154+
; CHECK-NEXT: mov z1.d, z24.d
155155
; CHECK-NEXT: ret
156156
entry:
157157
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)

llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul-scalable.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,14 @@ define <vscale x 16 x half> @complex_mul_v16f16(<vscale x 16 x half> %a, <vscale
7373
; CHECK-LABEL: complex_mul_v16f16:
7474
; CHECK: // %bb.0: // %entry
7575
; CHECK-NEXT: mov z4.h, #0 // =0x0
76+
; CHECK-NEXT: mov z5.h, #0 // =0x0
7677
; CHECK-NEXT: ptrue p0.h
77-
; CHECK-NEXT: mov z5.d, z4.d
78-
; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #0
7978
; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #0
80-
; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #90
79+
; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #0
8180
; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #90
82-
; CHECK-NEXT: mov z1.d, z4.d
81+
; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #90
8382
; CHECK-NEXT: mov z0.d, z5.d
83+
; CHECK-NEXT: mov z1.d, z4.d
8484
; CHECK-NEXT: ret
8585
entry:
8686
%a.deinterleaved = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.vector.deinterleave2.nxv16f16(<vscale x 16 x half> %a)
@@ -104,22 +104,22 @@ define <vscale x 32 x half> @complex_mul_v32f16(<vscale x 32 x half> %a, <vscale
104104
; CHECK-LABEL: complex_mul_v32f16:
105105
; CHECK: // %bb.0: // %entry
106106
; CHECK-NEXT: mov z24.h, #0 // =0x0
107+
; CHECK-NEXT: mov z25.h, #0 // =0x0
108+
; CHECK-NEXT: mov z26.h, #0 // =0x0
109+
; CHECK-NEXT: mov z27.h, #0 // =0x0
107110
; CHECK-NEXT: ptrue p0.h
108-
; CHECK-NEXT: mov z25.d, z24.d
109-
; CHECK-NEXT: mov z26.d, z24.d
110-
; CHECK-NEXT: mov z27.d, z24.d
111-
; CHECK-NEXT: fcmla z24.h, p0/m, z7.h, z3.h, #0
112-
; CHECK-NEXT: fcmla z25.h, p0/m, z4.h, z0.h, #0
113-
; CHECK-NEXT: fcmla z26.h, p0/m, z5.h, z1.h, #0
111+
; CHECK-NEXT: fcmla z24.h, p0/m, z4.h, z0.h, #0
112+
; CHECK-NEXT: fcmla z25.h, p0/m, z5.h, z1.h, #0
114113
; CHECK-NEXT: fcmla z27.h, p0/m, z6.h, z2.h, #0
115-
; CHECK-NEXT: fcmla z24.h, p0/m, z7.h, z3.h, #90
116-
; CHECK-NEXT: fcmla z25.h, p0/m, z4.h, z0.h, #90
117-
; CHECK-NEXT: fcmla z26.h, p0/m, z5.h, z1.h, #90
114+
; CHECK-NEXT: fcmla z26.h, p0/m, z7.h, z3.h, #0
115+
; CHECK-NEXT: fcmla z24.h, p0/m, z4.h, z0.h, #90
116+
; CHECK-NEXT: fcmla z25.h, p0/m, z5.h, z1.h, #90
118117
; CHECK-NEXT: fcmla z27.h, p0/m, z6.h, z2.h, #90
119-
; CHECK-NEXT: mov z3.d, z24.d
120-
; CHECK-NEXT: mov z0.d, z25.d
121-
; CHECK-NEXT: mov z1.d, z26.d
118+
; CHECK-NEXT: fcmla z26.h, p0/m, z7.h, z3.h, #90
119+
; CHECK-NEXT: mov z0.d, z24.d
120+
; CHECK-NEXT: mov z1.d, z25.d
122121
; CHECK-NEXT: mov z2.d, z27.d
122+
; CHECK-NEXT: mov z3.d, z26.d
123123
; CHECK-NEXT: ret
124124
entry:
125125
%a.deinterleaved = tail call { <vscale x 16 x half>, <vscale x 16 x half> } @llvm.vector.deinterleave2.nxv32f16(<vscale x 32 x half> %a)

llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul-scalable.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,14 @@ define <vscale x 8 x float> @complex_mul_v8f32(<vscale x 8 x float> %a, <vscale
3535
; CHECK-LABEL: complex_mul_v8f32:
3636
; CHECK: // %bb.0: // %entry
3737
; CHECK-NEXT: mov z4.s, #0 // =0x0
38+
; CHECK-NEXT: mov z5.s, #0 // =0x0
3839
; CHECK-NEXT: ptrue p0.s
39-
; CHECK-NEXT: mov z5.d, z4.d
40-
; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #0
4140
; CHECK-NEXT: fcmla z5.s, p0/m, z2.s, z0.s, #0
42-
; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #90
41+
; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #0
4342
; CHECK-NEXT: fcmla z5.s, p0/m, z2.s, z0.s, #90
44-
; CHECK-NEXT: mov z1.d, z4.d
43+
; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #90
4544
; CHECK-NEXT: mov z0.d, z5.d
45+
; CHECK-NEXT: mov z1.d, z4.d
4646
; CHECK-NEXT: ret
4747
entry:
4848
%a.deinterleaved = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> %a)
@@ -66,22 +66,22 @@ define <vscale x 16 x float> @complex_mul_v16f32(<vscale x 16 x float> %a, <vsca
6666
; CHECK-LABEL: complex_mul_v16f32:
6767
; CHECK: // %bb.0: // %entry
6868
; CHECK-NEXT: mov z24.s, #0 // =0x0
69+
; CHECK-NEXT: mov z25.s, #0 // =0x0
70+
; CHECK-NEXT: mov z26.s, #0 // =0x0
71+
; CHECK-NEXT: mov z27.s, #0 // =0x0
6972
; CHECK-NEXT: ptrue p0.s
70-
; CHECK-NEXT: mov z25.d, z24.d
71-
; CHECK-NEXT: mov z26.d, z24.d
72-
; CHECK-NEXT: mov z27.d, z24.d
73-
; CHECK-NEXT: fcmla z24.s, p0/m, z7.s, z3.s, #0
74-
; CHECK-NEXT: fcmla z25.s, p0/m, z4.s, z0.s, #0
75-
; CHECK-NEXT: fcmla z26.s, p0/m, z5.s, z1.s, #0
73+
; CHECK-NEXT: fcmla z24.s, p0/m, z4.s, z0.s, #0
74+
; CHECK-NEXT: fcmla z25.s, p0/m, z5.s, z1.s, #0
7675
; CHECK-NEXT: fcmla z27.s, p0/m, z6.s, z2.s, #0
77-
; CHECK-NEXT: fcmla z24.s, p0/m, z7.s, z3.s, #90
78-
; CHECK-NEXT: fcmla z25.s, p0/m, z4.s, z0.s, #90
79-
; CHECK-NEXT: fcmla z26.s, p0/m, z5.s, z1.s, #90
76+
; CHECK-NEXT: fcmla z26.s, p0/m, z7.s, z3.s, #0
77+
; CHECK-NEXT: fcmla z24.s, p0/m, z4.s, z0.s, #90
78+
; CHECK-NEXT: fcmla z25.s, p0/m, z5.s, z1.s, #90
8079
; CHECK-NEXT: fcmla z27.s, p0/m, z6.s, z2.s, #90
81-
; CHECK-NEXT: mov z3.d, z24.d
82-
; CHECK-NEXT: mov z0.d, z25.d
83-
; CHECK-NEXT: mov z1.d, z26.d
80+
; CHECK-NEXT: fcmla z26.s, p0/m, z7.s, z3.s, #90
81+
; CHECK-NEXT: mov z0.d, z24.d
82+
; CHECK-NEXT: mov z1.d, z25.d
8483
; CHECK-NEXT: mov z2.d, z27.d
84+
; CHECK-NEXT: mov z3.d, z26.d
8585
; CHECK-NEXT: ret
8686
entry:
8787
%a.deinterleaved = tail call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.vector.deinterleave2.nxv16f32(<vscale x 16 x float> %a)

llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul-scalable.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,14 @@ define <vscale x 4 x double> @complex_mul_v4f64(<vscale x 4 x double> %a, <vscal
3535
; CHECK-LABEL: complex_mul_v4f64:
3636
; CHECK: // %bb.0: // %entry
3737
; CHECK-NEXT: mov z4.d, #0 // =0x0
38+
; CHECK-NEXT: mov z5.d, #0 // =0x0
3839
; CHECK-NEXT: ptrue p0.d
39-
; CHECK-NEXT: mov z5.d, z4.d
40-
; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #0
4140
; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #0
42-
; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #90
41+
; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #0
4342
; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #90
44-
; CHECK-NEXT: mov z1.d, z4.d
43+
; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #90
4544
; CHECK-NEXT: mov z0.d, z5.d
45+
; CHECK-NEXT: mov z1.d, z4.d
4646
; CHECK-NEXT: ret
4747
entry:
4848
%a.deinterleaved = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
@@ -66,22 +66,22 @@ define <vscale x 8 x double> @complex_mul_v8f64(<vscale x 8 x double> %a, <vscal
6666
; CHECK-LABEL: complex_mul_v8f64:
6767
; CHECK: // %bb.0: // %entry
6868
; CHECK-NEXT: mov z24.d, #0 // =0x0
69+
; CHECK-NEXT: mov z25.d, #0 // =0x0
70+
; CHECK-NEXT: mov z26.d, #0 // =0x0
71+
; CHECK-NEXT: mov z27.d, #0 // =0x0
6972
; CHECK-NEXT: ptrue p0.d
70-
; CHECK-NEXT: mov z25.d, z24.d
71-
; CHECK-NEXT: mov z26.d, z24.d
72-
; CHECK-NEXT: mov z27.d, z24.d
73-
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z3.d, #0
74-
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z0.d, #0
75-
; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z1.d, #0
73+
; CHECK-NEXT: fcmla z24.d, p0/m, z4.d, z0.d, #0
74+
; CHECK-NEXT: fcmla z25.d, p0/m, z5.d, z1.d, #0
7675
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z2.d, #0
77-
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z3.d, #90
78-
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z0.d, #90
79-
; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z1.d, #90
76+
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z3.d, #0
77+
; CHECK-NEXT: fcmla z24.d, p0/m, z4.d, z0.d, #90
78+
; CHECK-NEXT: fcmla z25.d, p0/m, z5.d, z1.d, #90
8079
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z2.d, #90
81-
; CHECK-NEXT: mov z3.d, z24.d
82-
; CHECK-NEXT: mov z0.d, z25.d
83-
; CHECK-NEXT: mov z1.d, z26.d
80+
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z3.d, #90
81+
; CHECK-NEXT: mov z0.d, z24.d
82+
; CHECK-NEXT: mov z1.d, z25.d
8483
; CHECK-NEXT: mov z2.d, z27.d
84+
; CHECK-NEXT: mov z3.d, z26.d
8585
; CHECK-NEXT: ret
8686
entry:
8787
%a.deinterleaved = tail call { <vscale x 4 x double>, <vscale x 4 x double> } @llvm.vector.deinterleave2.nxv8f64(<vscale x 8 x double> %a)

0 commit comments

Comments
 (0)