@@ -274,64 +274,12 @@ define <4 x i32> @knownbits_urem_lshr(<4 x i32> %a0) nounwind {
274
274
define <4 x i32 > @knownbits_mask_urem_shuffle_lshr (<4 x i32 > %a0 , <4 x i32 > %a1 ) nounwind {
275
275
; X32-LABEL: knownbits_mask_urem_shuffle_lshr:
276
276
; X32: # BB#0:
277
- ; X32-NEXT: pushl %esi
278
- ; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [32767,4294967295,4294967295,32767]
279
- ; X32-NEXT: vpand %xmm2, %xmm0, %xmm0
280
- ; X32-NEXT: vpand %xmm2, %xmm1, %xmm1
281
- ; X32-NEXT: vpextrd $1, %xmm0, %eax
282
- ; X32-NEXT: vpextrd $1, %xmm1, %ecx
283
- ; X32-NEXT: xorl %edx, %edx
284
- ; X32-NEXT: divl %ecx
285
- ; X32-NEXT: movl %edx, %ecx
286
- ; X32-NEXT: vmovd %xmm0, %eax
287
- ; X32-NEXT: vmovd %xmm1, %esi
288
- ; X32-NEXT: xorl %edx, %edx
289
- ; X32-NEXT: divl %esi
290
- ; X32-NEXT: vmovd %edx, %xmm2
291
- ; X32-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
292
- ; X32-NEXT: vpextrd $2, %xmm0, %eax
293
- ; X32-NEXT: vpextrd $2, %xmm1, %ecx
294
- ; X32-NEXT: xorl %edx, %edx
295
- ; X32-NEXT: divl %ecx
296
- ; X32-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2
297
- ; X32-NEXT: vpextrd $3, %xmm0, %eax
298
- ; X32-NEXT: vpextrd $3, %xmm1, %ecx
299
- ; X32-NEXT: xorl %edx, %edx
300
- ; X32-NEXT: divl %ecx
301
- ; X32-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0
302
- ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
303
- ; X32-NEXT: vpsrld $22, %xmm0, %xmm0
304
- ; X32-NEXT: popl %esi
277
+ ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
305
278
; X32-NEXT: retl
306
279
;
307
280
; X64-LABEL: knownbits_mask_urem_shuffle_lshr:
308
281
; X64: # BB#0:
309
- ; X64-NEXT: vmovdqa {{.*#+}} xmm2 = [32767,4294967295,4294967295,32767]
310
- ; X64-NEXT: vpand %xmm2, %xmm0, %xmm0
311
- ; X64-NEXT: vpand %xmm2, %xmm1, %xmm1
312
- ; X64-NEXT: vpextrd $1, %xmm0, %eax
313
- ; X64-NEXT: vpextrd $1, %xmm1, %ecx
314
- ; X64-NEXT: xorl %edx, %edx
315
- ; X64-NEXT: divl %ecx
316
- ; X64-NEXT: movl %edx, %ecx
317
- ; X64-NEXT: vmovd %xmm0, %eax
318
- ; X64-NEXT: vmovd %xmm1, %esi
319
- ; X64-NEXT: xorl %edx, %edx
320
- ; X64-NEXT: divl %esi
321
- ; X64-NEXT: vmovd %edx, %xmm2
322
- ; X64-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
323
- ; X64-NEXT: vpextrd $2, %xmm0, %eax
324
- ; X64-NEXT: vpextrd $2, %xmm1, %ecx
325
- ; X64-NEXT: xorl %edx, %edx
326
- ; X64-NEXT: divl %ecx
327
- ; X64-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2
328
- ; X64-NEXT: vpextrd $3, %xmm0, %eax
329
- ; X64-NEXT: vpextrd $3, %xmm1, %ecx
330
- ; X64-NEXT: xorl %edx, %edx
331
- ; X64-NEXT: divl %ecx
332
- ; X64-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0
333
- ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
334
- ; X64-NEXT: vpsrld $22, %xmm0, %xmm0
282
+ ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
335
283
; X64-NEXT: retq
336
284
%1 = and <4 x i32 > %a0 , <i32 32767 , i32 -1 , i32 -1 , i32 32767 >
337
285
%2 = and <4 x i32 > %a1 , <i32 32767 , i32 -1 , i32 -1 , i32 32767 >
@@ -344,26 +292,12 @@ define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1)
344
292
define <4 x i32 > @knownbits_mask_srem_shuffle_lshr (<4 x i32 > %a0 ) nounwind {
345
293
; X32-LABEL: knownbits_mask_srem_shuffle_lshr:
346
294
; X32: # BB#0:
347
- ; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
348
- ; X32-NEXT: vpsrad $31, %xmm0, %xmm1
349
- ; X32-NEXT: vpsrld $28, %xmm1, %xmm1
350
- ; X32-NEXT: vpaddd %xmm1, %xmm0, %xmm1
351
- ; X32-NEXT: vpand {{\.LCPI.*}}, %xmm1, %xmm1
352
- ; X32-NEXT: vpsubd %xmm1, %xmm0, %xmm0
353
- ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
354
- ; X32-NEXT: vpsrld $22, %xmm0, %xmm0
295
+ ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
355
296
; X32-NEXT: retl
356
297
;
357
298
; X64-LABEL: knownbits_mask_srem_shuffle_lshr:
358
299
; X64: # BB#0:
359
- ; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
360
- ; X64-NEXT: vpsrad $31, %xmm0, %xmm1
361
- ; X64-NEXT: vpsrld $28, %xmm1, %xmm1
362
- ; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1
363
- ; X64-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
364
- ; X64-NEXT: vpsubd %xmm1, %xmm0, %xmm0
365
- ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
366
- ; X64-NEXT: vpsrld $22, %xmm0, %xmm0
300
+ ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
367
301
; X64-NEXT: retq
368
302
%1 = and <4 x i32 > %a0 , <i32 -32768 , i32 -1 , i32 -1 , i32 -32768 >
369
303
%2 = srem <4 x i32 > %1 , <i32 16 , i32 16 , i32 16 , i32 16 >
0 commit comments