@@ -255,3 +255,119 @@ define <4 x i32> @knownbits_mask_udiv_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1)
255
255
%4 = lshr <4 x i32 > %3 , <i32 22 , i32 22 , i32 22 , i32 22 >
256
256
ret <4 x i32 > %4
257
257
}
258
+
259
+ define <4 x i32 > @knownbits_urem_lshr (<4 x i32 > %a0 ) nounwind {
260
+ ; X32-LABEL: knownbits_urem_lshr:
261
+ ; X32: # BB#0:
262
+ ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
263
+ ; X32-NEXT: retl
264
+ ;
265
+ ; X64-LABEL: knownbits_urem_lshr:
266
+ ; X64: # BB#0:
267
+ ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
268
+ ; X64-NEXT: retq
269
+ %1 = urem <4 x i32 > %a0 , <i32 16 , i32 16 , i32 16 , i32 16 >
270
+ %2 = lshr <4 x i32 > %1 , <i32 22 , i32 22 , i32 22 , i32 22 >
271
+ ret <4 x i32 > %2
272
+ }
273
+
274
+ define <4 x i32 > @knownbits_mask_urem_shuffle_lshr (<4 x i32 > %a0 , <4 x i32 > %a1 ) nounwind {
275
+ ; X32-LABEL: knownbits_mask_urem_shuffle_lshr:
276
+ ; X32: # BB#0:
277
+ ; X32-NEXT: pushl %esi
278
+ ; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [32767,4294967295,4294967295,32767]
279
+ ; X32-NEXT: vpand %xmm2, %xmm0, %xmm0
280
+ ; X32-NEXT: vpand %xmm2, %xmm1, %xmm1
281
+ ; X32-NEXT: vpextrd $1, %xmm0, %eax
282
+ ; X32-NEXT: vpextrd $1, %xmm1, %ecx
283
+ ; X32-NEXT: xorl %edx, %edx
284
+ ; X32-NEXT: divl %ecx
285
+ ; X32-NEXT: movl %edx, %ecx
286
+ ; X32-NEXT: vmovd %xmm0, %eax
287
+ ; X32-NEXT: vmovd %xmm1, %esi
288
+ ; X32-NEXT: xorl %edx, %edx
289
+ ; X32-NEXT: divl %esi
290
+ ; X32-NEXT: vmovd %edx, %xmm2
291
+ ; X32-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
292
+ ; X32-NEXT: vpextrd $2, %xmm0, %eax
293
+ ; X32-NEXT: vpextrd $2, %xmm1, %ecx
294
+ ; X32-NEXT: xorl %edx, %edx
295
+ ; X32-NEXT: divl %ecx
296
+ ; X32-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2
297
+ ; X32-NEXT: vpextrd $3, %xmm0, %eax
298
+ ; X32-NEXT: vpextrd $3, %xmm1, %ecx
299
+ ; X32-NEXT: xorl %edx, %edx
300
+ ; X32-NEXT: divl %ecx
301
+ ; X32-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0
302
+ ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
303
+ ; X32-NEXT: vpsrld $22, %xmm0, %xmm0
304
+ ; X32-NEXT: popl %esi
305
+ ; X32-NEXT: retl
306
+ ;
307
+ ; X64-LABEL: knownbits_mask_urem_shuffle_lshr:
308
+ ; X64: # BB#0:
309
+ ; X64-NEXT: vmovdqa {{.*#+}} xmm2 = [32767,4294967295,4294967295,32767]
310
+ ; X64-NEXT: vpand %xmm2, %xmm0, %xmm0
311
+ ; X64-NEXT: vpand %xmm2, %xmm1, %xmm1
312
+ ; X64-NEXT: vpextrd $1, %xmm0, %eax
313
+ ; X64-NEXT: vpextrd $1, %xmm1, %ecx
314
+ ; X64-NEXT: xorl %edx, %edx
315
+ ; X64-NEXT: divl %ecx
316
+ ; X64-NEXT: movl %edx, %ecx
317
+ ; X64-NEXT: vmovd %xmm0, %eax
318
+ ; X64-NEXT: vmovd %xmm1, %esi
319
+ ; X64-NEXT: xorl %edx, %edx
320
+ ; X64-NEXT: divl %esi
321
+ ; X64-NEXT: vmovd %edx, %xmm2
322
+ ; X64-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
323
+ ; X64-NEXT: vpextrd $2, %xmm0, %eax
324
+ ; X64-NEXT: vpextrd $2, %xmm1, %ecx
325
+ ; X64-NEXT: xorl %edx, %edx
326
+ ; X64-NEXT: divl %ecx
327
+ ; X64-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2
328
+ ; X64-NEXT: vpextrd $3, %xmm0, %eax
329
+ ; X64-NEXT: vpextrd $3, %xmm1, %ecx
330
+ ; X64-NEXT: xorl %edx, %edx
331
+ ; X64-NEXT: divl %ecx
332
+ ; X64-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0
333
+ ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
334
+ ; X64-NEXT: vpsrld $22, %xmm0, %xmm0
335
+ ; X64-NEXT: retq
336
+ %1 = and <4 x i32 > %a0 , <i32 32767 , i32 -1 , i32 -1 , i32 32767 >
337
+ %2 = and <4 x i32 > %a1 , <i32 32767 , i32 -1 , i32 -1 , i32 32767 >
338
+ %3 = urem <4 x i32 > %1 , %2
339
+ %4 = shufflevector <4 x i32 > %3 , <4 x i32 > undef , <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 3 >
340
+ %5 = lshr <4 x i32 > %4 , <i32 22 , i32 22 , i32 22 , i32 22 >
341
+ ret <4 x i32 > %5
342
+ }
343
+
344
+ define <4 x i32 > @knownbits_mask_srem_shuffle_lshr (<4 x i32 > %a0 ) nounwind {
345
+ ; X32-LABEL: knownbits_mask_srem_shuffle_lshr:
346
+ ; X32: # BB#0:
347
+ ; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
348
+ ; X32-NEXT: vpsrad $31, %xmm0, %xmm1
349
+ ; X32-NEXT: vpsrld $28, %xmm1, %xmm1
350
+ ; X32-NEXT: vpaddd %xmm1, %xmm0, %xmm1
351
+ ; X32-NEXT: vpand {{\.LCPI.*}}, %xmm1, %xmm1
352
+ ; X32-NEXT: vpsubd %xmm1, %xmm0, %xmm0
353
+ ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
354
+ ; X32-NEXT: vpsrld $22, %xmm0, %xmm0
355
+ ; X32-NEXT: retl
356
+ ;
357
+ ; X64-LABEL: knownbits_mask_srem_shuffle_lshr:
358
+ ; X64: # BB#0:
359
+ ; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
360
+ ; X64-NEXT: vpsrad $31, %xmm0, %xmm1
361
+ ; X64-NEXT: vpsrld $28, %xmm1, %xmm1
362
+ ; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1
363
+ ; X64-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
364
+ ; X64-NEXT: vpsubd %xmm1, %xmm0, %xmm0
365
+ ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
366
+ ; X64-NEXT: vpsrld $22, %xmm0, %xmm0
367
+ ; X64-NEXT: retq
368
+ %1 = and <4 x i32 > %a0 , <i32 -32768 , i32 -1 , i32 -1 , i32 -32768 >
369
+ %2 = srem <4 x i32 > %1 , <i32 16 , i32 16 , i32 16 , i32 16 >
370
+ %3 = shufflevector <4 x i32 > %2 , <4 x i32 > undef , <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 3 >
371
+ %4 = lshr <4 x i32 > %3 , <i32 22 , i32 22 , i32 22 , i32 22 >
372
+ ret <4 x i32 > %4
373
+ }
0 commit comments