Skip to content

Commit edd876d

Browse files
[LLVM][ComplexDeinterleaving] Update splat identification to include vector ConstantInt/FP.
1 parent 09153db commit edd876d

File tree

2 files changed

+26
-44
lines changed

2 files changed

+26
-44
lines changed

llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2005,6 +2005,9 @@ ComplexDeinterleavingGraph::identifySplat(Value *R, Value *I) {
20052005
if (isa<ConstantDataVector>(V))
20062006
return true;
20072007

2008+
if (isa<ConstantInt>(V) || isa<ConstantFP>(V))
2009+
return isa<VectorType>(V->getType());
2010+
20082011
VectorType *VTy;
20092012
ArrayRef<int> Mask;
20102013
// Splats are represented differently depending on whether the repeated

llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll

Lines changed: 23 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,33 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s --mattr=+sve -o - | FileCheck %s -check-prefixes=CHECK,SHUFFLE_SPLAT
3-
; RUN: llc -use-constant-int-for-scalable-splat -use-constant-fp-for-scalable-splat < %s --mattr=+sve -o - | FileCheck %s -check-prefixes=CHECK,CONST_SPLAT
2+
; RUN: llc < %s --mattr=+sve -o - | FileCheck %s
3+
; RUN: llc -use-constant-int-for-scalable-splat -use-constant-fp-for-scalable-splat < %s --mattr=+sve -o - | FileCheck %s
44

55
target triple = "aarch64"
66

77
; a[i] * b[i] * (11.0 + 3.0.i);
88
;
99
define <vscale x 4 x double> @complex_mul_const(<vscale x 4 x double> %a, <vscale x 4 x double> %b) {
10-
; SHUFFLE_SPLAT-LABEL: complex_mul_const:
11-
; SHUFFLE_SPLAT: // %bb.0: // %entry
12-
; SHUFFLE_SPLAT-NEXT: movi v4.2d, #0000000000000000
13-
; SHUFFLE_SPLAT-NEXT: movi v5.2d, #0000000000000000
14-
; SHUFFLE_SPLAT-NEXT: ptrue p0.d
15-
; SHUFFLE_SPLAT-NEXT: fmov z6.d, #3.00000000
16-
; SHUFFLE_SPLAT-NEXT: fmov z7.d, #11.00000000
17-
; SHUFFLE_SPLAT-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #0
18-
; SHUFFLE_SPLAT-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #0
19-
; SHUFFLE_SPLAT-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #90
20-
; SHUFFLE_SPLAT-NEXT: movi v2.2d, #0000000000000000
21-
; SHUFFLE_SPLAT-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #90
22-
; SHUFFLE_SPLAT-NEXT: zip2 z1.d, z7.d, z6.d
23-
; SHUFFLE_SPLAT-NEXT: movi v0.2d, #0000000000000000
24-
; SHUFFLE_SPLAT-NEXT: zip1 z3.d, z7.d, z6.d
25-
; SHUFFLE_SPLAT-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #0
26-
; SHUFFLE_SPLAT-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #0
27-
; SHUFFLE_SPLAT-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #90
28-
; SHUFFLE_SPLAT-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #90
29-
; SHUFFLE_SPLAT-NEXT: mov z1.d, z2.d
30-
; SHUFFLE_SPLAT-NEXT: ret
31-
;
32-
; CONST_SPLAT-LABEL: complex_mul_const:
33-
; CONST_SPLAT: // %bb.0: // %entry
34-
; CONST_SPLAT-NEXT: uzp1 z4.d, z2.d, z3.d
35-
; CONST_SPLAT-NEXT: uzp2 z5.d, z0.d, z1.d
36-
; CONST_SPLAT-NEXT: uzp2 z2.d, z2.d, z3.d
37-
; CONST_SPLAT-NEXT: uzp1 z0.d, z0.d, z1.d
38-
; CONST_SPLAT-NEXT: ptrue p0.d
39-
; CONST_SPLAT-NEXT: fmul z3.d, z4.d, z5.d
40-
; CONST_SPLAT-NEXT: fmul z1.d, z2.d, z5.d
41-
; CONST_SPLAT-NEXT: fmov z5.d, #11.00000000
42-
; CONST_SPLAT-NEXT: fmad z2.d, p0/m, z0.d, z3.d
43-
; CONST_SPLAT-NEXT: fmov z3.d, #3.00000000
44-
; CONST_SPLAT-NEXT: fnmsb z0.d, p0/m, z4.d, z1.d
45-
; CONST_SPLAT-NEXT: fmul z1.d, z2.d, z5.d
46-
; CONST_SPLAT-NEXT: fmul z2.d, z2.d, z3.d
47-
; CONST_SPLAT-NEXT: fmla z1.d, p0/m, z0.d, z3.d
48-
; CONST_SPLAT-NEXT: fnmls z2.d, p0/m, z0.d, z5.d
49-
; CONST_SPLAT-NEXT: zip1 z0.d, z2.d, z1.d
50-
; CONST_SPLAT-NEXT: zip2 z1.d, z2.d, z1.d
51-
; CONST_SPLAT-NEXT: ret
10+
; CHECK-LABEL: complex_mul_const:
11+
; CHECK: // %bb.0: // %entry
12+
; CHECK-NEXT: movi v4.2d, #0000000000000000
13+
; CHECK-NEXT: movi v5.2d, #0000000000000000
14+
; CHECK-NEXT: ptrue p0.d
15+
; CHECK-NEXT: fmov z6.d, #3.00000000
16+
; CHECK-NEXT: fmov z7.d, #11.00000000
17+
; CHECK-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #0
18+
; CHECK-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #0
19+
; CHECK-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #90
20+
; CHECK-NEXT: movi v2.2d, #0000000000000000
21+
; CHECK-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #90
22+
; CHECK-NEXT: zip2 z1.d, z7.d, z6.d
23+
; CHECK-NEXT: movi v0.2d, #0000000000000000
24+
; CHECK-NEXT: zip1 z3.d, z7.d, z6.d
25+
; CHECK-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #0
26+
; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #0
27+
; CHECK-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #90
28+
; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #90
29+
; CHECK-NEXT: mov z1.d, z2.d
30+
; CHECK-NEXT: ret
5231
entry:
5332
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
5433
%0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0

0 commit comments

Comments
 (0)