@@ -93,3 +93,209 @@ define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
93
93
%4 = xor <vscale x 2 x i64 > %3 , splat(i64 -1 )
94
94
ret <vscale x 2 x i64 > %4
95
95
}
96
+
97
+ ; Test BSL/NBSL/BSL1N/BSL2N code generation for:
98
+ ; #define BSL(x,y,z) ( ((x) & (z)) | ( (y) & ~(z)))
99
+ ; #define NBSL(x,y,z) (~(((x) & (z)) | ( (y) & ~(z))))
100
+ ; #define BSL1N(x,y,z) ( (~(x) & (z)) | ( (y) & ~(z)))
101
+ ; #define BSL2N(x,y,z) ( ((x) & (z)) | (~(y) & ~(z)))
102
+
103
+ define <vscale x 16 x i8 > @codegen_bsl_i8 (<vscale x 16 x i8 > %0 , <vscale x 16 x i8 > %1 , <vscale x 16 x i8 > %2 ) {
104
+ ; CHECK-LABEL: codegen_bsl_i8:
105
+ ; CHECK: // %bb.0:
106
+ ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
107
+ ; CHECK-NEXT: ret
108
+ %4 = and <vscale x 16 x i8 > %2 , %0
109
+ %5 = xor <vscale x 16 x i8 > %2 , splat (i8 -1 )
110
+ %6 = and <vscale x 16 x i8 > %1 , %5
111
+ %7 = or <vscale x 16 x i8 > %4 , %6
112
+ ret <vscale x 16 x i8 > %7
113
+ }
114
+
115
+ define <vscale x 16 x i8 > @codegen_nbsl_i8 (<vscale x 16 x i8 > %0 , <vscale x 16 x i8 > %1 , <vscale x 16 x i8 > %2 ) {
116
+ ; CHECK-LABEL: codegen_nbsl_i8:
117
+ ; CHECK: // %bb.0:
118
+ ; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
119
+ ; CHECK-NEXT: ret
120
+ %4 = and <vscale x 16 x i8 > %2 , %0
121
+ %5 = xor <vscale x 16 x i8 > %2 , splat (i8 -1 )
122
+ %6 = and <vscale x 16 x i8 > %1 , %5
123
+ %7 = or <vscale x 16 x i8 > %4 , %6
124
+ %8 = xor <vscale x 16 x i8 > %7 , splat (i8 -1 )
125
+ ret <vscale x 16 x i8 > %8
126
+ }
127
+
128
+ define <vscale x 16 x i8 > @codegen_bsl1n_i8 (<vscale x 16 x i8 > %0 , <vscale x 16 x i8 > %1 , <vscale x 16 x i8 > %2 ) {
129
+ ; CHECK-LABEL: codegen_bsl1n_i8:
130
+ ; CHECK: // %bb.0:
131
+ ; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
132
+ ; CHECK-NEXT: ret
133
+ %4 = xor <vscale x 16 x i8 > %0 , splat (i8 -1 )
134
+ %5 = and <vscale x 16 x i8 > %2 , %4
135
+ %6 = xor <vscale x 16 x i8 > %2 , splat (i8 -1 )
136
+ %7 = and <vscale x 16 x i8 > %1 , %6
137
+ %8 = or <vscale x 16 x i8 > %5 , %7
138
+ ret <vscale x 16 x i8 > %8
139
+ }
140
+
141
+ define <vscale x 16 x i8 > @codegen_bsl2n_i8 (<vscale x 16 x i8 > %0 , <vscale x 16 x i8 > %1 , <vscale x 16 x i8 > %2 ) {
142
+ ; CHECK-LABEL: codegen_bsl2n_i8:
143
+ ; CHECK: // %bb.0:
144
+ ; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
145
+ ; CHECK-NEXT: ret
146
+ %4 = and <vscale x 16 x i8 > %2 , %0
147
+ %5 = or <vscale x 16 x i8 > %2 , %1
148
+ %6 = xor <vscale x 16 x i8 > %5 , splat (i8 -1 )
149
+ %7 = or <vscale x 16 x i8 > %4 , %6
150
+ ret <vscale x 16 x i8 > %7
151
+ }
152
+
153
+ define <vscale x 8 x i16 > @codegen_bsl_i16 (<vscale x 8 x i16 > %0 , <vscale x 8 x i16 > %1 , <vscale x 8 x i16 > %2 ) {
154
+ ; CHECK-LABEL: codegen_bsl_i16:
155
+ ; CHECK: // %bb.0:
156
+ ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
157
+ ; CHECK-NEXT: ret
158
+ %4 = and <vscale x 8 x i16 > %2 , %0
159
+ %5 = xor <vscale x 8 x i16 > %2 , splat (i16 -1 )
160
+ %6 = and <vscale x 8 x i16 > %1 , %5
161
+ %7 = or <vscale x 8 x i16 > %4 , %6
162
+ ret <vscale x 8 x i16 > %7
163
+ }
164
+
165
+ define <vscale x 8 x i16 > @codegen_nbsl_i16 (<vscale x 8 x i16 > %0 , <vscale x 8 x i16 > %1 , <vscale x 8 x i16 > %2 ) {
166
+ ; CHECK-LABEL: codegen_nbsl_i16:
167
+ ; CHECK: // %bb.0:
168
+ ; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
169
+ ; CHECK-NEXT: ret
170
+ %4 = and <vscale x 8 x i16 > %2 , %0
171
+ %5 = xor <vscale x 8 x i16 > %2 , splat (i16 -1 )
172
+ %6 = and <vscale x 8 x i16 > %1 , %5
173
+ %7 = or <vscale x 8 x i16 > %4 , %6
174
+ %8 = xor <vscale x 8 x i16 > %7 , splat (i16 -1 )
175
+ ret <vscale x 8 x i16 > %8
176
+ }
177
+
178
+ define <vscale x 8 x i16 > @codegen_bsl1n_i16 (<vscale x 8 x i16 > %0 , <vscale x 8 x i16 > %1 , <vscale x 8 x i16 > %2 ) {
179
+ ; CHECK-LABEL: codegen_bsl1n_i16:
180
+ ; CHECK: // %bb.0:
181
+ ; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
182
+ ; CHECK-NEXT: ret
183
+ %4 = xor <vscale x 8 x i16 > %0 , splat (i16 -1 )
184
+ %5 = and <vscale x 8 x i16 > %2 , %4
185
+ %6 = xor <vscale x 8 x i16 > %2 , splat (i16 -1 )
186
+ %7 = and <vscale x 8 x i16 > %1 , %6
187
+ %8 = or <vscale x 8 x i16 > %5 , %7
188
+ ret <vscale x 8 x i16 > %8
189
+ }
190
+
191
+ define <vscale x 8 x i16 > @codegen_bsl2n_i16 (<vscale x 8 x i16 > %0 , <vscale x 8 x i16 > %1 , <vscale x 8 x i16 > %2 ) {
192
+ ; CHECK-LABEL: codegen_bsl2n_i16:
193
+ ; CHECK: // %bb.0:
194
+ ; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
195
+ ; CHECK-NEXT: ret
196
+ %4 = and <vscale x 8 x i16 > %2 , %0
197
+ %5 = or <vscale x 8 x i16 > %2 , %1
198
+ %6 = xor <vscale x 8 x i16 > %5 , splat (i16 -1 )
199
+ %7 = or <vscale x 8 x i16 > %4 , %6
200
+ ret <vscale x 8 x i16 > %7
201
+ }
202
+
203
+ define <vscale x 4 x i32 > @codegen_bsl_i32 (<vscale x 4 x i32 > %0 , <vscale x 4 x i32 > %1 , <vscale x 4 x i32 > %2 ) {
204
+ ; CHECK-LABEL: codegen_bsl_i32:
205
+ ; CHECK: // %bb.0:
206
+ ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
207
+ ; CHECK-NEXT: ret
208
+ %4 = and <vscale x 4 x i32 > %2 , %0
209
+ %5 = xor <vscale x 4 x i32 > %2 , splat (i32 -1 )
210
+ %6 = and <vscale x 4 x i32 > %1 , %5
211
+ %7 = or <vscale x 4 x i32 > %4 , %6
212
+ ret <vscale x 4 x i32 > %7
213
+ }
214
+
215
+ define <vscale x 4 x i32 > @codegen_nbsl_i32 (<vscale x 4 x i32 > %0 , <vscale x 4 x i32 > %1 , <vscale x 4 x i32 > %2 ) {
216
+ ; CHECK-LABEL: codegen_nbsl_i32:
217
+ ; CHECK: // %bb.0:
218
+ ; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
219
+ ; CHECK-NEXT: ret
220
+ %4 = and <vscale x 4 x i32 > %2 , %0
221
+ %5 = xor <vscale x 4 x i32 > %2 , splat (i32 -1 )
222
+ %6 = and <vscale x 4 x i32 > %1 , %5
223
+ %7 = or <vscale x 4 x i32 > %4 , %6
224
+ %8 = xor <vscale x 4 x i32 > %7 , splat (i32 -1 )
225
+ ret <vscale x 4 x i32 > %8
226
+ }
227
+
228
+ define <vscale x 4 x i32 > @codegen_bsl1n_i32 (<vscale x 4 x i32 > %0 , <vscale x 4 x i32 > %1 , <vscale x 4 x i32 > %2 ) {
229
+ ; CHECK-LABEL: codegen_bsl1n_i32:
230
+ ; CHECK: // %bb.0:
231
+ ; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
232
+ ; CHECK-NEXT: ret
233
+ %4 = xor <vscale x 4 x i32 > %0 , splat (i32 -1 )
234
+ %5 = and <vscale x 4 x i32 > %2 , %4
235
+ %6 = xor <vscale x 4 x i32 > %2 , splat (i32 -1 )
236
+ %7 = and <vscale x 4 x i32 > %1 , %6
237
+ %8 = or <vscale x 4 x i32 > %5 , %7
238
+ ret <vscale x 4 x i32 > %8
239
+ }
240
+
241
+ define <vscale x 4 x i32 > @codegen_bsl2n_i32 (<vscale x 4 x i32 > %0 , <vscale x 4 x i32 > %1 , <vscale x 4 x i32 > %2 ) {
242
+ ; CHECK-LABEL: codegen_bsl2n_i32:
243
+ ; CHECK: // %bb.0:
244
+ ; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
245
+ ; CHECK-NEXT: ret
246
+ %4 = and <vscale x 4 x i32 > %2 , %0
247
+ %5 = or <vscale x 4 x i32 > %2 , %1
248
+ %6 = xor <vscale x 4 x i32 > %5 , splat (i32 -1 )
249
+ %7 = or <vscale x 4 x i32 > %4 , %6
250
+ ret <vscale x 4 x i32 > %7
251
+ }
252
+
253
+ define <vscale x 2 x i64 > @codegen_bsl_i64 (<vscale x 2 x i64 > %0 , <vscale x 2 x i64 > %1 , <vscale x 2 x i64 > %2 ) {
254
+ ; CHECK-LABEL: codegen_bsl_i64:
255
+ ; CHECK: // %bb.0:
256
+ ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
257
+ ; CHECK-NEXT: ret
258
+ %4 = and <vscale x 2 x i64 > %2 , %0
259
+ %5 = xor <vscale x 2 x i64 > %2 , splat (i64 -1 )
260
+ %6 = and <vscale x 2 x i64 > %1 , %5
261
+ %7 = or <vscale x 2 x i64 > %4 , %6
262
+ ret <vscale x 2 x i64 > %7
263
+ }
264
+
265
+ define <vscale x 2 x i64 > @codegen_nbsl_i64 (<vscale x 2 x i64 > %0 , <vscale x 2 x i64 > %1 , <vscale x 2 x i64 > %2 ) {
266
+ ; CHECK-LABEL: codegen_nbsl_i64:
267
+ ; CHECK: // %bb.0:
268
+ ; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
269
+ ; CHECK-NEXT: ret
270
+ %4 = and <vscale x 2 x i64 > %2 , %0
271
+ %5 = xor <vscale x 2 x i64 > %2 , splat (i64 -1 )
272
+ %6 = and <vscale x 2 x i64 > %1 , %5
273
+ %7 = or <vscale x 2 x i64 > %4 , %6
274
+ %8 = xor <vscale x 2 x i64 > %7 , splat (i64 -1 )
275
+ ret <vscale x 2 x i64 > %8
276
+ }
277
+
278
+ define <vscale x 2 x i64 > @codegen_bsl1n_i64 (<vscale x 2 x i64 > %0 , <vscale x 2 x i64 > %1 , <vscale x 2 x i64 > %2 ) {
279
+ ; CHECK-LABEL: codegen_bsl1n_i64:
280
+ ; CHECK: // %bb.0:
281
+ ; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
282
+ ; CHECK-NEXT: ret
283
+ %4 = xor <vscale x 2 x i64 > %0 , splat (i64 -1 )
284
+ %5 = and <vscale x 2 x i64 > %2 , %4
285
+ %6 = xor <vscale x 2 x i64 > %2 , splat (i64 -1 )
286
+ %7 = and <vscale x 2 x i64 > %1 , %6
287
+ %8 = or <vscale x 2 x i64 > %5 , %7
288
+ ret <vscale x 2 x i64 > %8
289
+ }
290
+
291
+ define <vscale x 2 x i64 > @codegen_bsl2n_i64 (<vscale x 2 x i64 > %0 , <vscale x 2 x i64 > %1 , <vscale x 2 x i64 > %2 ) {
292
+ ; CHECK-LABEL: codegen_bsl2n_i64:
293
+ ; CHECK: // %bb.0:
294
+ ; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
295
+ ; CHECK-NEXT: ret
296
+ %4 = and <vscale x 2 x i64 > %2 , %0
297
+ %5 = or <vscale x 2 x i64 > %2 , %1
298
+ %6 = xor <vscale x 2 x i64 > %5 , splat (i64 -1 )
299
+ %7 = or <vscale x 2 x i64 > %4 , %6
300
+ ret <vscale x 2 x i64 > %7
301
+ }
0 commit comments