Skip to content

Commit a963dbb

Browse files
committed
[SVE ACLE] Extend existing aarch64_sve_mul combines to also act on aarch64_sve_mul_u.
Differential Revision: https://reviews.llvm.org/D152004
1 parent cb9a7c2 commit a963dbb

File tree

2 files changed

+120
-0
lines changed

2 files changed

+120
-0
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1696,6 +1696,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
16961696
case Intrinsic::aarch64_sve_ptest_last:
16971697
return instCombineSVEPTest(IC, II);
16981698
case Intrinsic::aarch64_sve_mul:
1699+
case Intrinsic::aarch64_sve_mul_u:
16991700
case Intrinsic::aarch64_sve_fmul:
17001701
case Intrinsic::aarch64_sve_fmul_u:
17011702
return instCombineSVEVectorMul(IC, II);
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
; RUN: opt -S -passes=instcombine < %s | FileCheck %s
2+
3+
target triple = "aarch64-unknown-linux-gnu"
4+
5+
; Idempotent muls -- should compile to just a ret.
6+
define <vscale x 8 x i16> @idempotent_mul_u_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 {
7+
; CHECK-LABEL: @idempotent_mul_u_i16(
8+
; CHECK-NEXT: ret <vscale x 8 x i16> [[A:%.*]]
9+
;
10+
%1 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 1)
11+
%2 = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %1)
12+
ret <vscale x 8 x i16> %2
13+
}
14+
15+
define <vscale x 4 x i32> @idempotent_mul_u_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
16+
; CHECK-LABEL: @idempotent_mul_u_i32(
17+
; CHECK-NEXT: ret <vscale x 4 x i32> [[A:%.*]]
18+
;
19+
%1 = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
20+
%2 = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %1)
21+
ret <vscale x 4 x i32> %2
22+
}
23+
24+
define <vscale x 2 x i64> @idempotent_mul_u_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
25+
; CHECK-LABEL: @idempotent_mul_u_i64(
26+
; CHECK-NEXT: ret <vscale x 2 x i64> [[A:%.*]]
27+
;
28+
%1 = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 1)
29+
%2 = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %1)
30+
ret <vscale x 2 x i64> %2
31+
}
32+
33+
define <vscale x 2 x i64> @idempotent_mul_u_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
34+
; CHECK-LABEL: @idempotent_mul_u_different_argument_order(
35+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> [[A:%.*]])
36+
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
37+
;
38+
%1 = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 1)
39+
; Different argument order to the above tests.
40+
%2 = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %1, <vscale x 2 x i64> %a)
41+
ret <vscale x 2 x i64> %2
42+
}
43+
44+
define <vscale x 8 x i16> @idempotent_mul_u_with_predicated_dup(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 {
45+
; CHECK-LABEL: @idempotent_mul_u_with_predicated_dup(
46+
; CHECK-NEXT: ret <vscale x 8 x i16> [[A:%.*]]
47+
;
48+
%1 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, i16 1)
49+
%2 = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %1)
50+
ret <vscale x 8 x i16> %2
51+
}
52+
53+
define <vscale x 8 x i16> @idempotent_mul_u_two_dups(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 {
54+
; Edge case -- make sure that the case where we're multiplying two dups
55+
; together is sane.
56+
; CHECK-LABEL: @idempotent_mul_u_two_dups(
57+
; CHECK-NEXT: ret <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
58+
;
59+
%1 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 1)
60+
%2 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 1)
61+
%3 = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2)
62+
ret <vscale x 8 x i16> %3
63+
}
64+
65+
; Non-idempotent muls -- we don't expect these to be optimised out.
66+
define <vscale x 8 x i16> @non_idempotent_mul_u_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 {
67+
; CHECK-LABEL: @non_idempotent_mul_u_i16(
68+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
69+
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
70+
;
71+
%1 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
72+
%2 = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %1)
73+
ret <vscale x 8 x i16> %2
74+
}
75+
76+
define <vscale x 4 x i32> @non_idempotent_mul_u_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
77+
; CHECK-LABEL: @non_idempotent_mul_u_i32(
78+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
79+
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
80+
;
81+
%1 = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 2)
82+
%2 = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %1)
83+
ret <vscale x 4 x i32> %2
84+
}
85+
86+
define <vscale x 2 x i64> @non_idempotent_mul_u_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
87+
; CHECK-LABEL: @non_idempotent_mul_u_i64(
88+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
89+
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
90+
;
91+
%1 = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 2)
92+
%2 = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %1)
93+
ret <vscale x 2 x i64> %2
94+
}
95+
96+
define <vscale x 2 x i64> @non_idempotent_mul_u_with_predicated_dup(<vscale x 2 x i1> %pg1, <vscale x 2 x i1> %pg2, <vscale x 2 x i64> %a) #0 {
97+
; Different predicates
98+
; CHECK-LABEL: @non_idempotent_mul_u_with_predicated_dup(
99+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[PG1:%.*]], i64 1)
100+
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> [[PG2:%.*]], <vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[TMP1]])
101+
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
102+
;
103+
%1 = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg1, i64 1)
104+
%2 = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> %pg2, <vscale x 2 x i64> %a, <vscale x 2 x i64> %1)
105+
ret <vscale x 2 x i64> %2
106+
}
107+
108+
declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
109+
declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
110+
declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64)
111+
112+
declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64)
113+
declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16)
114+
115+
declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
116+
declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
117+
declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
118+
119+
attributes #0 = { "target-features"="+sve" }

0 commit comments

Comments
 (0)