Skip to content

[LLVM][AArch64][SVE] Mark DUP immediate instructions with isAsCheapAsAMove. #133945

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/SVEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -2117,6 +2117,7 @@ class sve_int_dup_mask_imm<string asm>

let DecoderMethod = "DecodeSVELogicalImmInstruction";
let hasSideEffects = 0;
let isAsCheapAsAMove = 1;
let isReMaterializable = 1;
let Uses = [VG];
}
Expand Down Expand Up @@ -5122,6 +5123,7 @@ class sve_int_dup_imm<bits<2> sz8_64, string asm,
let Inst{4-0} = Zd;

let hasSideEffects = 0;
let isAsCheapAsAMove = 1;
let isReMaterializable = 1;
let Uses = [VG];
}
Expand Down Expand Up @@ -5165,6 +5167,7 @@ class sve_int_dup_fpimm<bits<2> sz8_64, Operand fpimmtype,
let Inst{4-0} = Zd;

let hasSideEffects = 0;
let isAsCheapAsAMove = 1;
let isReMaterializable = 1;
let Uses = [VG];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,20 +51,20 @@ define <vscale x 4 x double> @mul_add_mull(<vscale x 4 x double> %a, <vscale x 4
; CHECK-LABEL: mul_add_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
; CHECK-NEXT: mov z25.d, #0 // =0x0
; CHECK-NEXT: mov z26.d, #0 // =0x0
; CHECK-NEXT: mov z27.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: mov z26.d, z24.d
; CHECK-NEXT: mov z27.d, z24.d
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #90
; CHECK-NEXT: fadd z1.d, z26.d, z24.d
; CHECK-NEXT: fadd z0.d, z25.d, z27.d
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #90
; CHECK-NEXT: fadd z0.d, z24.d, z27.d
; CHECK-NEXT: fadd z1.d, z25.d, z26.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
Expand Down Expand Up @@ -102,20 +102,20 @@ define <vscale x 4 x double> @mul_sub_mull(<vscale x 4 x double> %a, <vscale x 4
; CHECK-LABEL: mul_sub_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
; CHECK-NEXT: mov z25.d, #0 // =0x0
; CHECK-NEXT: mov z26.d, #0 // =0x0
; CHECK-NEXT: mov z27.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: mov z26.d, z24.d
; CHECK-NEXT: mov z27.d, z24.d
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #90
; CHECK-NEXT: fsub z1.d, z26.d, z24.d
; CHECK-NEXT: fsub z0.d, z25.d, z27.d
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #90
; CHECK-NEXT: fsub z0.d, z24.d, z27.d
; CHECK-NEXT: fsub z1.d, z25.d, z26.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
Expand Down Expand Up @@ -153,20 +153,20 @@ define <vscale x 4 x double> @mul_conj_mull(<vscale x 4 x double> %a, <vscale x
; CHECK-LABEL: mul_conj_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
; CHECK-NEXT: mov z25.d, #0 // =0x0
; CHECK-NEXT: mov z26.d, #0 // =0x0
; CHECK-NEXT: mov z27.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: mov z26.d, z24.d
; CHECK-NEXT: mov z27.d, z24.d
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z27.d, p0/m, z4.d, z6.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z7.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: fcmla z27.d, p0/m, z4.d, z6.d, #270
; CHECK-NEXT: fadd z1.d, z26.d, z24.d
; CHECK-NEXT: fadd z0.d, z25.d, z27.d
; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z7.d, #270
; CHECK-NEXT: fadd z0.d, z24.d, z27.d
; CHECK-NEXT: fadd z1.d, z25.d, z26.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,18 @@ define <vscale x 4 x double> @mul_add_mull(<vscale x 4 x double> %a, <vscale x 4
; CHECK-LABEL: mul_add_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
; CHECK-NEXT: mov z25.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #90
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
; CHECK-NEXT: mov z1.d, z24.d
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: mov z1.d, z24.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
Expand Down Expand Up @@ -91,18 +91,18 @@ define <vscale x 4 x double> @mul_sub_mull(<vscale x 4 x double> %a, <vscale x 4
; CHECK-LABEL: mul_sub_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
; CHECK-NEXT: mov z25.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #270
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #270
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #270
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #180
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #180
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #180
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
; CHECK-NEXT: mov z1.d, z24.d
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: mov z1.d, z24.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
Expand Down Expand Up @@ -140,18 +140,18 @@ define <vscale x 4 x double> @mul_conj_mull(<vscale x 4 x double> %a, <vscale x
; CHECK-LABEL: mul_conj_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
; CHECK-NEXT: mov z25.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #270
; CHECK-NEXT: mov z1.d, z24.d
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: mov z1.d, z24.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,14 @@ define <vscale x 16 x half> @complex_mul_v16f16(<vscale x 16 x half> %a, <vscale
; CHECK-LABEL: complex_mul_v16f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z4.h, #0 // =0x0
; CHECK-NEXT: mov z5.h, #0 // =0x0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z5.d, z4.d
; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #0
; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #0
; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #90
; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #0
; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #90
; CHECK-NEXT: mov z1.d, z4.d
; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #90
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: mov z1.d, z4.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.vector.deinterleave2.nxv16f16(<vscale x 16 x half> %a)
Expand All @@ -104,22 +104,22 @@ define <vscale x 32 x half> @complex_mul_v32f16(<vscale x 32 x half> %a, <vscale
; CHECK-LABEL: complex_mul_v32f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.h, #0 // =0x0
; CHECK-NEXT: mov z25.h, #0 // =0x0
; CHECK-NEXT: mov z26.h, #0 // =0x0
; CHECK-NEXT: mov z27.h, #0 // =0x0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: mov z26.d, z24.d
; CHECK-NEXT: mov z27.d, z24.d
; CHECK-NEXT: fcmla z24.h, p0/m, z7.h, z3.h, #0
; CHECK-NEXT: fcmla z25.h, p0/m, z4.h, z0.h, #0
; CHECK-NEXT: fcmla z26.h, p0/m, z5.h, z1.h, #0
; CHECK-NEXT: fcmla z24.h, p0/m, z4.h, z0.h, #0
; CHECK-NEXT: fcmla z25.h, p0/m, z5.h, z1.h, #0
; CHECK-NEXT: fcmla z27.h, p0/m, z6.h, z2.h, #0
; CHECK-NEXT: fcmla z24.h, p0/m, z7.h, z3.h, #90
; CHECK-NEXT: fcmla z25.h, p0/m, z4.h, z0.h, #90
; CHECK-NEXT: fcmla z26.h, p0/m, z5.h, z1.h, #90
; CHECK-NEXT: fcmla z26.h, p0/m, z7.h, z3.h, #0
; CHECK-NEXT: fcmla z24.h, p0/m, z4.h, z0.h, #90
; CHECK-NEXT: fcmla z25.h, p0/m, z5.h, z1.h, #90
; CHECK-NEXT: fcmla z27.h, p0/m, z6.h, z2.h, #90
; CHECK-NEXT: mov z3.d, z24.d
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: mov z1.d, z26.d
; CHECK-NEXT: fcmla z26.h, p0/m, z7.h, z3.h, #90
; CHECK-NEXT: mov z0.d, z24.d
; CHECK-NEXT: mov z1.d, z25.d
; CHECK-NEXT: mov z2.d, z27.d
; CHECK-NEXT: mov z3.d, z26.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 16 x half>, <vscale x 16 x half> } @llvm.vector.deinterleave2.nxv32f16(<vscale x 32 x half> %a)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ define <vscale x 8 x float> @complex_mul_v8f32(<vscale x 8 x float> %a, <vscale
; CHECK-LABEL: complex_mul_v8f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z4.s, #0 // =0x0
; CHECK-NEXT: mov z5.s, #0 // =0x0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z5.d, z4.d
; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #0
; CHECK-NEXT: fcmla z5.s, p0/m, z2.s, z0.s, #0
; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #90
; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #0
; CHECK-NEXT: fcmla z5.s, p0/m, z2.s, z0.s, #90
; CHECK-NEXT: mov z1.d, z4.d
; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #90
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: mov z1.d, z4.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> %a)
Expand All @@ -66,22 +66,22 @@ define <vscale x 16 x float> @complex_mul_v16f32(<vscale x 16 x float> %a, <vsca
; CHECK-LABEL: complex_mul_v16f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.s, #0 // =0x0
; CHECK-NEXT: mov z25.s, #0 // =0x0
; CHECK-NEXT: mov z26.s, #0 // =0x0
; CHECK-NEXT: mov z27.s, #0 // =0x0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: mov z26.d, z24.d
; CHECK-NEXT: mov z27.d, z24.d
; CHECK-NEXT: fcmla z24.s, p0/m, z7.s, z3.s, #0
; CHECK-NEXT: fcmla z25.s, p0/m, z4.s, z0.s, #0
; CHECK-NEXT: fcmla z26.s, p0/m, z5.s, z1.s, #0
; CHECK-NEXT: fcmla z24.s, p0/m, z4.s, z0.s, #0
; CHECK-NEXT: fcmla z25.s, p0/m, z5.s, z1.s, #0
; CHECK-NEXT: fcmla z27.s, p0/m, z6.s, z2.s, #0
; CHECK-NEXT: fcmla z24.s, p0/m, z7.s, z3.s, #90
; CHECK-NEXT: fcmla z25.s, p0/m, z4.s, z0.s, #90
; CHECK-NEXT: fcmla z26.s, p0/m, z5.s, z1.s, #90
; CHECK-NEXT: fcmla z26.s, p0/m, z7.s, z3.s, #0
; CHECK-NEXT: fcmla z24.s, p0/m, z4.s, z0.s, #90
; CHECK-NEXT: fcmla z25.s, p0/m, z5.s, z1.s, #90
; CHECK-NEXT: fcmla z27.s, p0/m, z6.s, z2.s, #90
; CHECK-NEXT: mov z3.d, z24.d
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: mov z1.d, z26.d
; CHECK-NEXT: fcmla z26.s, p0/m, z7.s, z3.s, #90
; CHECK-NEXT: mov z0.d, z24.d
; CHECK-NEXT: mov z1.d, z25.d
; CHECK-NEXT: mov z2.d, z27.d
; CHECK-NEXT: mov z3.d, z26.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.vector.deinterleave2.nxv16f32(<vscale x 16 x float> %a)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ define <vscale x 4 x double> @complex_mul_v4f64(<vscale x 4 x double> %a, <vscal
; CHECK-LABEL: complex_mul_v4f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z4.d, #0 // =0x0
; CHECK-NEXT: mov z5.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z5.d, z4.d
; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #0
; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #90
; CHECK-NEXT: mov z1.d, z4.d
; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: mov z1.d, z4.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
Expand All @@ -66,22 +66,22 @@ define <vscale x 8 x double> @complex_mul_v8f64(<vscale x 8 x double> %a, <vscal
; CHECK-LABEL: complex_mul_v8f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
; CHECK-NEXT: mov z25.d, #0 // =0x0
; CHECK-NEXT: mov z26.d, #0 // =0x0
; CHECK-NEXT: mov z27.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: mov z26.d, z24.d
; CHECK-NEXT: mov z27.d, z24.d
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z3.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z0.d, #0
; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z1.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z4.d, z0.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z5.d, z1.d, #0
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z2.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z3.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z0.d, #90
; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z1.d, #90
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z3.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z4.d, z0.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z5.d, z1.d, #90
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z2.d, #90
; CHECK-NEXT: mov z3.d, z24.d
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: mov z1.d, z26.d
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z3.d, #90
; CHECK-NEXT: mov z0.d, z24.d
; CHECK-NEXT: mov z1.d, z25.d
; CHECK-NEXT: mov z2.d, z27.d
; CHECK-NEXT: mov z3.d, z26.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 4 x double>, <vscale x 4 x double> } @llvm.vector.deinterleave2.nxv8f64(<vscale x 8 x double> %a)
Expand Down
Loading
Loading