@@ -64,13 +64,14 @@ define i16 @mls_i16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e) {
64
64
define i64 @mla_i64 (i64 %a , i64 %b , i64 %c , i64 %d , i64 %e ) {
65
65
; CHECK-LABEL: mla_i64:
66
66
; CHECK: // %bb.0:
67
- ; CHECK-NEXT: madd x8, x2, x1, x0
68
- ; CHECK-NEXT: madd x0, x4, x3, x8
67
+ ; CHECK-NEXT: mul x8, x4, x3
68
+ ; CHECK-NEXT: madd x8, x2, x1, x8
69
+ ; CHECK-NEXT: add x0, x8, x0
69
70
; CHECK-NEXT: ret
70
71
%m1 = mul i64 %c , %b
71
72
%m2 = mul i64 %e , %d
72
- %s1 = add i64 %m1 , %a
73
- %s2 = add i64 %s1 , %m2
73
+ %s1 = add i64 %m1 , %m2
74
+ %s2 = add i64 %s1 , %a
74
75
ret i64 %s2
75
76
}
76
77
@@ -89,6 +90,89 @@ define i64 @mls_i64_C(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
89
90
ret i64 %s2
90
91
}
91
92
93
+ define i64 @umlsl_i64_muls (i64 %a , i32 %b , i32 %c , i32 %d , i32 %e ) {
94
+ ; CHECK-LABEL: umlsl_i64_muls:
95
+ ; CHECK: // %bb.0:
96
+ ; CHECK-NEXT: umull x8, w2, w3
97
+ ; CHECK-NEXT: umsubl x8, w4, w3, x8
98
+ ; CHECK-NEXT: umsubl x0, w2, w1, x8
99
+ ; CHECK-NEXT: ret
100
+ %be = zext i32 %b to i64
101
+ %ce = zext i32 %c to i64
102
+ %de = zext i32 %d to i64
103
+ %ee = zext i32 %e to i64
104
+ %m1.neg = mul nuw i64 %ce , %be
105
+ %m2.neg = mul nuw i64 %ee , %de
106
+ %m3 = mul nuw i64 %ce , %de
107
+ %reass.add = add i64 %m2.neg , %m1.neg
108
+ %s2 = sub i64 %m3 , %reass.add
109
+ ret i64 %s2
110
+ }
111
+
112
+ define i64 @umlsl_i64_uses (i64 %a , i32 %b , i32 %c , i32 %d , i32 %e ) {
113
+ ; CHECK-LABEL: umlsl_i64_uses:
114
+ ; CHECK: // %bb.0:
115
+ ; CHECK-NEXT: umull x8, w4, w3
116
+ ; CHECK-NEXT: umaddl x8, w2, w1, x8
117
+ ; CHECK-NEXT: sub x9, x0, x8
118
+ ; CHECK-NEXT: and x0, x8, x9
119
+ ; CHECK-NEXT: ret
120
+ %be = zext i32 %b to i64
121
+ %ce = zext i32 %c to i64
122
+ %de = zext i32 %d to i64
123
+ %ee = zext i32 %e to i64
124
+ %m1.neg = mul nuw i64 %ce , %be
125
+ %m2.neg = mul nuw i64 %ee , %de
126
+ %reass.add = add i64 %m2.neg , %m1.neg
127
+ %s2 = sub i64 %a , %reass.add
128
+ %o = and i64 %reass.add , %s2
129
+ ret i64 %o
130
+ }
131
+
132
+ define i64 @mla_i64_C (i64 %a , i64 %b , i64 %c , i64 %d , i64 %e ) {
133
+ ; CHECK-LABEL: mla_i64_C:
134
+ ; CHECK: // %bb.0:
135
+ ; CHECK-NEXT: mul x8, x2, x1
136
+ ; CHECK-NEXT: madd x8, x4, x3, x8
137
+ ; CHECK-NEXT: add x0, x8, #10
138
+ ; CHECK-NEXT: ret
139
+ %m1.neg = mul i64 %c , %b
140
+ %m2.neg = mul i64 %e , %d
141
+ %reass.add = add i64 %m2.neg , %m1.neg
142
+ %s2 = add i64 10 , %reass.add
143
+ ret i64 %s2
144
+ }
145
+
146
+ define i64 @mla_i64_uses (i64 %a , i64 %b , i64 %c , i64 %d , i64 %e ) {
147
+ ; CHECK-LABEL: mla_i64_uses:
148
+ ; CHECK: // %bb.0:
149
+ ; CHECK-NEXT: mul x8, x2, x1
150
+ ; CHECK-NEXT: madd x8, x4, x3, x8
151
+ ; CHECK-NEXT: add x9, x0, x8
152
+ ; CHECK-NEXT: eor x0, x8, x9
153
+ ; CHECK-NEXT: ret
154
+ %m1.neg = mul i64 %c , %b
155
+ %m2.neg = mul i64 %e , %d
156
+ %reass.add = add i64 %m2.neg , %m1.neg
157
+ %s2 = add i64 %a , %reass.add
158
+ %o = xor i64 %reass.add , %s2
159
+ ret i64 %o
160
+ }
161
+
162
+ define i64 @mla_i64_mul (i64 %a , i64 %b , i64 %c , i64 %d , i64 %e ) {
163
+ ; CHECK-LABEL: mla_i64_mul:
164
+ ; CHECK: // %bb.0:
165
+ ; CHECK-NEXT: mul x8, x2, x1
166
+ ; CHECK-NEXT: madd x9, x4, x3, x8
167
+ ; CHECK-NEXT: add x0, x8, x9
168
+ ; CHECK-NEXT: ret
169
+ %m1.neg = mul i64 %c , %b
170
+ %m2.neg = mul i64 %e , %d
171
+ %reass.add = add i64 %m2.neg , %m1.neg
172
+ %s2 = add i64 %m1.neg , %reass.add
173
+ ret i64 %s2
174
+ }
175
+
92
176
93
177
define <8 x i16 > @smlsl_v8i16 (<8 x i16 > %a , <8 x i8 > %b , <8 x i8 > %c , <8 x i8 > %d , <8 x i8 > %e ) {
94
178
; CHECK-LABEL: smlsl_v8i16:
@@ -140,13 +224,14 @@ define <8 x i16> @mls_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16>
140
224
define <8 x i16 > @mla_v8i16 (<8 x i16 > %a , <8 x i16 > %b , <8 x i16 > %c , <8 x i16 > %d , <8 x i16 > %e ) {
141
225
; CHECK-LABEL: mla_v8i16:
142
226
; CHECK: // %bb.0:
143
- ; CHECK-NEXT: mla v0.8h, v2.8h, v1.8h
144
- ; CHECK-NEXT: mla v0.8h, v4.8h, v3.8h
227
+ ; CHECK-NEXT: mul v3.8h, v4.8h, v3.8h
228
+ ; CHECK-NEXT: mla v3.8h, v2.8h, v1.8h
229
+ ; CHECK-NEXT: add v0.8h, v3.8h, v0.8h
145
230
; CHECK-NEXT: ret
146
231
%m1 = mul <8 x i16 > %c , %b
147
232
%m2 = mul <8 x i16 > %e , %d
148
- %s1 = add <8 x i16 > %m1 , %a
149
- %s2 = add <8 x i16 > %s1 , %m2
233
+ %s1 = add <8 x i16 > %m1 , %m2
234
+ %s2 = add <8 x i16 > %s1 , %a
150
235
ret <8 x i16 > %s2
151
236
}
152
237
@@ -164,6 +249,21 @@ define <8 x i16> @mls_v8i16_C(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16
164
249
ret <8 x i16 > %s2
165
250
}
166
251
252
+ define <8 x i16 > @mla_v8i16_C (<8 x i16 > %a , <8 x i16 > %b , <8 x i16 > %c , <8 x i16 > %d , <8 x i16 > %e ) {
253
+ ; CHECK-LABEL: mla_v8i16_C:
254
+ ; CHECK: // %bb.0:
255
+ ; CHECK-NEXT: mul v1.8h, v2.8h, v1.8h
256
+ ; CHECK-NEXT: movi v0.8h, #10
257
+ ; CHECK-NEXT: mla v1.8h, v4.8h, v3.8h
258
+ ; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
259
+ ; CHECK-NEXT: ret
260
+ %m1.neg = mul <8 x i16 > %c , %b
261
+ %m2.neg = mul <8 x i16 > %e , %d
262
+ %reass.add = add <8 x i16 > %m2.neg , %m1.neg
263
+ %s2 = add <8 x i16 > <i16 10 , i16 10 , i16 10 , i16 10 , i16 10 , i16 10 , i16 10 , i16 10 >, %reass.add
264
+ ret <8 x i16 > %s2
265
+ }
266
+
167
267
168
268
define <vscale x 8 x i16 > @smlsl_nxv8i16 (<vscale x 8 x i16 > %a , <vscale x 8 x i8 > %b , <vscale x 8 x i8 > %c , <vscale x 8 x i8 > %d , <vscale x 8 x i8 > %e ) {
169
269
; CHECK-LABEL: smlsl_nxv8i16:
@@ -227,12 +327,13 @@ define <vscale x 8 x i16> @mla_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16>
227
327
; CHECK-LABEL: mla_nxv8i16:
228
328
; CHECK: // %bb.0:
229
329
; CHECK-NEXT: ptrue p0.h
230
- ; CHECK-NEXT: mla z0.h, p0/m, z2.h, z1.h
231
- ; CHECK-NEXT: mla z0.h, p0/m, z4.h, z3.h
330
+ ; CHECK-NEXT: mul z1.h, z2.h, z1.h
331
+ ; CHECK-NEXT: mla z1.h, p0/m, z4.h, z3.h
332
+ ; CHECK-NEXT: add z0.h, z1.h, z0.h
232
333
; CHECK-NEXT: ret
233
334
%m1 = mul <vscale x 8 x i16 > %c , %b
234
335
%m2 = mul <vscale x 8 x i16 > %e , %d
235
- %s1 = add <vscale x 8 x i16 > %m1 , %a
236
- %s2 = add <vscale x 8 x i16 > %s1 , %m2
336
+ %s1 = add <vscale x 8 x i16 > %m1 , %m2
337
+ %s2 = add <vscale x 8 x i16 > %s1 , %a
237
338
ret <vscale x 8 x i16 > %s2
238
339
}
0 commit comments