Skip to content

Commit 8a7b5e0

Browse files
committed
[AArch64] Guard extra uses in mls combine.
This is a small extension to D143143 to ensure that nodes with multiple uses to not get transformed. The tests have also been extended to include more mla cases.
1 parent c3ca290 commit 8a7b5e0

File tree

2 files changed

+118
-13
lines changed

2 files changed

+118
-13
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17723,13 +17723,17 @@ static SDValue performAddCombineForShiftedOperands(SDNode *N,
1772317723
static SDValue performSubAddMULCombine(SDNode *N, SelectionDAG &DAG) {
1772417724
if (N->getOpcode() != ISD::SUB)
1772517725
return SDValue();
17726+
1772617727
SDValue Add = N->getOperand(1);
17728+
SDValue X = N->getOperand(0);
1772717729
if (Add.getOpcode() != ISD::ADD)
1772817730
return SDValue();
1772917731

17730-
SDValue X = N->getOperand(0);
17732+
if (!Add.hasOneUse())
17733+
return SDValue();
1773117734
if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(X)))
1773217735
return SDValue();
17736+
1773317737
SDValue M1 = Add.getOperand(0);
1773417738
SDValue M2 = Add.getOperand(1);
1773517739
if (M1.getOpcode() != ISD::MUL && M1.getOpcode() != AArch64ISD::SMULL &&

llvm/test/CodeGen/AArch64/reassocmls.ll

Lines changed: 113 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,14 @@ define i16 @mls_i16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e) {
6464
define i64 @mla_i64(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
6565
; CHECK-LABEL: mla_i64:
6666
; CHECK: // %bb.0:
67-
; CHECK-NEXT: madd x8, x2, x1, x0
68-
; CHECK-NEXT: madd x0, x4, x3, x8
67+
; CHECK-NEXT: mul x8, x4, x3
68+
; CHECK-NEXT: madd x8, x2, x1, x8
69+
; CHECK-NEXT: add x0, x8, x0
6970
; CHECK-NEXT: ret
7071
%m1 = mul i64 %c, %b
7172
%m2 = mul i64 %e, %d
72-
%s1 = add i64 %m1, %a
73-
%s2 = add i64 %s1, %m2
73+
%s1 = add i64 %m1, %m2
74+
%s2 = add i64 %s1, %a
7475
ret i64 %s2
7576
}
7677

@@ -89,6 +90,89 @@ define i64 @mls_i64_C(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
8990
ret i64 %s2
9091
}
9192

93+
define i64 @umlsl_i64_muls(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
94+
; CHECK-LABEL: umlsl_i64_muls:
95+
; CHECK: // %bb.0:
96+
; CHECK-NEXT: umull x8, w2, w3
97+
; CHECK-NEXT: umsubl x8, w4, w3, x8
98+
; CHECK-NEXT: umsubl x0, w2, w1, x8
99+
; CHECK-NEXT: ret
100+
%be = zext i32 %b to i64
101+
%ce = zext i32 %c to i64
102+
%de = zext i32 %d to i64
103+
%ee = zext i32 %e to i64
104+
%m1.neg = mul nuw i64 %ce, %be
105+
%m2.neg = mul nuw i64 %ee, %de
106+
%m3 = mul nuw i64 %ce, %de
107+
%reass.add = add i64 %m2.neg, %m1.neg
108+
%s2 = sub i64 %m3, %reass.add
109+
ret i64 %s2
110+
}
111+
112+
define i64 @umlsl_i64_uses(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
113+
; CHECK-LABEL: umlsl_i64_uses:
114+
; CHECK: // %bb.0:
115+
; CHECK-NEXT: umull x8, w4, w3
116+
; CHECK-NEXT: umaddl x8, w2, w1, x8
117+
; CHECK-NEXT: sub x9, x0, x8
118+
; CHECK-NEXT: and x0, x8, x9
119+
; CHECK-NEXT: ret
120+
%be = zext i32 %b to i64
121+
%ce = zext i32 %c to i64
122+
%de = zext i32 %d to i64
123+
%ee = zext i32 %e to i64
124+
%m1.neg = mul nuw i64 %ce, %be
125+
%m2.neg = mul nuw i64 %ee, %de
126+
%reass.add = add i64 %m2.neg, %m1.neg
127+
%s2 = sub i64 %a, %reass.add
128+
%o = and i64 %reass.add, %s2
129+
ret i64 %o
130+
}
131+
132+
define i64 @mla_i64_C(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
133+
; CHECK-LABEL: mla_i64_C:
134+
; CHECK: // %bb.0:
135+
; CHECK-NEXT: mul x8, x2, x1
136+
; CHECK-NEXT: madd x8, x4, x3, x8
137+
; CHECK-NEXT: add x0, x8, #10
138+
; CHECK-NEXT: ret
139+
%m1.neg = mul i64 %c, %b
140+
%m2.neg = mul i64 %e, %d
141+
%reass.add = add i64 %m2.neg, %m1.neg
142+
%s2 = add i64 10, %reass.add
143+
ret i64 %s2
144+
}
145+
146+
define i64 @mla_i64_uses(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
147+
; CHECK-LABEL: mla_i64_uses:
148+
; CHECK: // %bb.0:
149+
; CHECK-NEXT: mul x8, x2, x1
150+
; CHECK-NEXT: madd x8, x4, x3, x8
151+
; CHECK-NEXT: add x9, x0, x8
152+
; CHECK-NEXT: eor x0, x8, x9
153+
; CHECK-NEXT: ret
154+
%m1.neg = mul i64 %c, %b
155+
%m2.neg = mul i64 %e, %d
156+
%reass.add = add i64 %m2.neg, %m1.neg
157+
%s2 = add i64 %a, %reass.add
158+
%o = xor i64 %reass.add, %s2
159+
ret i64 %o
160+
}
161+
162+
define i64 @mla_i64_mul(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
163+
; CHECK-LABEL: mla_i64_mul:
164+
; CHECK: // %bb.0:
165+
; CHECK-NEXT: mul x8, x2, x1
166+
; CHECK-NEXT: madd x9, x4, x3, x8
167+
; CHECK-NEXT: add x0, x8, x9
168+
; CHECK-NEXT: ret
169+
%m1.neg = mul i64 %c, %b
170+
%m2.neg = mul i64 %e, %d
171+
%reass.add = add i64 %m2.neg, %m1.neg
172+
%s2 = add i64 %m1.neg, %reass.add
173+
ret i64 %s2
174+
}
175+
92176

93177
define <8 x i16> @smlsl_v8i16(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d, <8 x i8> %e) {
94178
; CHECK-LABEL: smlsl_v8i16:
@@ -140,13 +224,14 @@ define <8 x i16> @mls_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16>
140224
define <8 x i16> @mla_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e) {
141225
; CHECK-LABEL: mla_v8i16:
142226
; CHECK: // %bb.0:
143-
; CHECK-NEXT: mla v0.8h, v2.8h, v1.8h
144-
; CHECK-NEXT: mla v0.8h, v4.8h, v3.8h
227+
; CHECK-NEXT: mul v3.8h, v4.8h, v3.8h
228+
; CHECK-NEXT: mla v3.8h, v2.8h, v1.8h
229+
; CHECK-NEXT: add v0.8h, v3.8h, v0.8h
145230
; CHECK-NEXT: ret
146231
%m1 = mul <8 x i16> %c, %b
147232
%m2 = mul <8 x i16> %e, %d
148-
%s1 = add <8 x i16> %m1, %a
149-
%s2 = add <8 x i16> %s1, %m2
233+
%s1 = add <8 x i16> %m1, %m2
234+
%s2 = add <8 x i16> %s1, %a
150235
ret <8 x i16> %s2
151236
}
152237

@@ -164,6 +249,21 @@ define <8 x i16> @mls_v8i16_C(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16
164249
ret <8 x i16> %s2
165250
}
166251

252+
define <8 x i16> @mla_v8i16_C(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e) {
253+
; CHECK-LABEL: mla_v8i16_C:
254+
; CHECK: // %bb.0:
255+
; CHECK-NEXT: mul v1.8h, v2.8h, v1.8h
256+
; CHECK-NEXT: movi v0.8h, #10
257+
; CHECK-NEXT: mla v1.8h, v4.8h, v3.8h
258+
; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
259+
; CHECK-NEXT: ret
260+
%m1.neg = mul <8 x i16> %c, %b
261+
%m2.neg = mul <8 x i16> %e, %d
262+
%reass.add = add <8 x i16> %m2.neg, %m1.neg
263+
%s2 = add <8 x i16> <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>, %reass.add
264+
ret <8 x i16> %s2
265+
}
266+
167267

168268
define <vscale x 8 x i16> @smlsl_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i8> %d, <vscale x 8 x i8> %e) {
169269
; CHECK-LABEL: smlsl_nxv8i16:
@@ -227,12 +327,13 @@ define <vscale x 8 x i16> @mla_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16>
227327
; CHECK-LABEL: mla_nxv8i16:
228328
; CHECK: // %bb.0:
229329
; CHECK-NEXT: ptrue p0.h
230-
; CHECK-NEXT: mla z0.h, p0/m, z2.h, z1.h
231-
; CHECK-NEXT: mla z0.h, p0/m, z4.h, z3.h
330+
; CHECK-NEXT: mul z1.h, z2.h, z1.h
331+
; CHECK-NEXT: mla z1.h, p0/m, z4.h, z3.h
332+
; CHECK-NEXT: add z0.h, z1.h, z0.h
232333
; CHECK-NEXT: ret
233334
%m1 = mul <vscale x 8 x i16> %c, %b
234335
%m2 = mul <vscale x 8 x i16> %e, %d
235-
%s1 = add <vscale x 8 x i16> %m1, %a
236-
%s2 = add <vscale x 8 x i16> %s1, %m2
336+
%s1 = add <vscale x 8 x i16> %m1, %m2
337+
%s2 = add <vscale x 8 x i16> %s1, %a
237338
ret <vscale x 8 x i16> %s2
238339
}

0 commit comments

Comments
 (0)