@@ -324,3 +324,129 @@ define <4 x i32> @combine_vec_udiv_by_shl_pow2b(<4 x i32> %x, <4 x i32> %y) {
324
324
%2 = udiv <4 x i32 > %x , %1
325
325
ret <4 x i32 > %2
326
326
}
327
+
328
+ ; fold (udiv x, c1)
329
+ define i32 @combine_udiv_uniform (i32 %x ) {
330
+ ; CHECK-LABEL: combine_udiv_uniform:
331
+ ; CHECK: # %bb.0:
332
+ ; CHECK-NEXT: movl %edi, %ecx
333
+ ; CHECK-NEXT: movl $2987803337, %eax # imm = 0xB21642C9
334
+ ; CHECK-NEXT: imulq %rcx, %rax
335
+ ; CHECK-NEXT: shrq $36, %rax
336
+ ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
337
+ ; CHECK-NEXT: retq
338
+ %1 = udiv i32 %x , 23
339
+ ret i32 %1
340
+ }
341
+
342
+ define <8 x i16 > @combine_vec_udiv_uniform (<8 x i16 > %x ) {
343
+ ; SSE-LABEL: combine_vec_udiv_uniform:
344
+ ; SSE: # %bb.0:
345
+ ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [25645,25645,25645,25645,25645,25645,25645,25645]
346
+ ; SSE-NEXT: pmulhuw %xmm0, %xmm1
347
+ ; SSE-NEXT: psubw %xmm1, %xmm0
348
+ ; SSE-NEXT: psrlw $1, %xmm0
349
+ ; SSE-NEXT: paddw %xmm1, %xmm0
350
+ ; SSE-NEXT: psrlw $4, %xmm0
351
+ ; SSE-NEXT: retq
352
+ ;
353
+ ; AVX-LABEL: combine_vec_udiv_uniform:
354
+ ; AVX: # %bb.0:
355
+ ; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
356
+ ; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0
357
+ ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
358
+ ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
359
+ ; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
360
+ ; AVX-NEXT: retq
361
+ %1 = udiv <8 x i16 > %x , <i16 23 , i16 23 , i16 23 , i16 23 , i16 23 , i16 23 , i16 23 , i16 23 >
362
+ ret <8 x i16 > %1
363
+ }
364
+
365
+ define <8 x i16 > @combine_vec_udiv_nonuniform (<8 x i16 > %x ) {
366
+ ; SSE-LABEL: combine_vec_udiv_nonuniform:
367
+ ; SSE: # %bb.0:
368
+ ; SSE-NEXT: movd %xmm0, %eax
369
+ ; SSE-NEXT: movzwl %ax, %ecx
370
+ ; SSE-NEXT: imull $25645, %ecx, %ecx # imm = 0x642D
371
+ ; SSE-NEXT: shrl $16, %ecx
372
+ ; SSE-NEXT: subl %ecx, %eax
373
+ ; SSE-NEXT: movzwl %ax, %eax
374
+ ; SSE-NEXT: shrl %eax
375
+ ; SSE-NEXT: addl %ecx, %eax
376
+ ; SSE-NEXT: shrl $4, %eax
377
+ ; SSE-NEXT: movd %eax, %xmm1
378
+ ; SSE-NEXT: pextrw $1, %xmm0, %eax
379
+ ; SSE-NEXT: imull $61681, %eax, %eax # imm = 0xF0F1
380
+ ; SSE-NEXT: shrl $21, %eax
381
+ ; SSE-NEXT: pinsrw $1, %eax, %xmm1
382
+ ; SSE-NEXT: pextrw $2, %xmm0, %eax
383
+ ; SSE-NEXT: imull $8195, %eax, %eax # imm = 0x2003
384
+ ; SSE-NEXT: shrl $29, %eax
385
+ ; SSE-NEXT: pinsrw $2, %eax, %xmm1
386
+ ; SSE-NEXT: pextrw $3, %xmm0, %eax
387
+ ; SSE-NEXT: shrl $3, %eax
388
+ ; SSE-NEXT: imull $9363, %eax, %eax # imm = 0x2493
389
+ ; SSE-NEXT: shrl $16, %eax
390
+ ; SSE-NEXT: pinsrw $3, %eax, %xmm1
391
+ ; SSE-NEXT: pextrw $4, %xmm0, %eax
392
+ ; SSE-NEXT: shrl $7, %eax
393
+ ; SSE-NEXT: pinsrw $4, %eax, %xmm1
394
+ ; SSE-NEXT: pextrw $5, %xmm0, %eax
395
+ ; SSE-NEXT: xorl %ecx, %ecx
396
+ ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
397
+ ; SSE-NEXT: sete %cl
398
+ ; SSE-NEXT: pinsrw $5, %ecx, %xmm1
399
+ ; SSE-NEXT: pextrw $6, %xmm0, %eax
400
+ ; SSE-NEXT: imull $32897, %eax, %eax # imm = 0x8081
401
+ ; SSE-NEXT: shrl $31, %eax
402
+ ; SSE-NEXT: pinsrw $6, %eax, %xmm1
403
+ ; SSE-NEXT: pextrw $7, %xmm0, %eax
404
+ ; SSE-NEXT: shrl $15, %eax
405
+ ; SSE-NEXT: pinsrw $7, %eax, %xmm1
406
+ ; SSE-NEXT: movdqa %xmm1, %xmm0
407
+ ; SSE-NEXT: retq
408
+ ;
409
+ ; AVX-LABEL: combine_vec_udiv_nonuniform:
410
+ ; AVX: # %bb.0:
411
+ ; AVX-NEXT: vmovd %xmm0, %eax
412
+ ; AVX-NEXT: movzwl %ax, %ecx
413
+ ; AVX-NEXT: imull $25645, %ecx, %ecx # imm = 0x642D
414
+ ; AVX-NEXT: shrl $16, %ecx
415
+ ; AVX-NEXT: subl %ecx, %eax
416
+ ; AVX-NEXT: movzwl %ax, %eax
417
+ ; AVX-NEXT: shrl %eax
418
+ ; AVX-NEXT: addl %ecx, %eax
419
+ ; AVX-NEXT: shrl $4, %eax
420
+ ; AVX-NEXT: vmovd %eax, %xmm1
421
+ ; AVX-NEXT: vpextrw $1, %xmm0, %eax
422
+ ; AVX-NEXT: imull $61681, %eax, %eax # imm = 0xF0F1
423
+ ; AVX-NEXT: shrl $21, %eax
424
+ ; AVX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
425
+ ; AVX-NEXT: vpextrw $2, %xmm0, %eax
426
+ ; AVX-NEXT: imull $8195, %eax, %eax # imm = 0x2003
427
+ ; AVX-NEXT: shrl $29, %eax
428
+ ; AVX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
429
+ ; AVX-NEXT: vpextrw $3, %xmm0, %eax
430
+ ; AVX-NEXT: shrl $3, %eax
431
+ ; AVX-NEXT: imull $9363, %eax, %eax # imm = 0x2493
432
+ ; AVX-NEXT: shrl $16, %eax
433
+ ; AVX-NEXT: vpinsrw $3, %eax, %xmm1, %xmm1
434
+ ; AVX-NEXT: vpextrw $4, %xmm0, %eax
435
+ ; AVX-NEXT: shrl $7, %eax
436
+ ; AVX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
437
+ ; AVX-NEXT: vpextrw $5, %xmm0, %eax
438
+ ; AVX-NEXT: xorl %ecx, %ecx
439
+ ; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
440
+ ; AVX-NEXT: sete %cl
441
+ ; AVX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
442
+ ; AVX-NEXT: vpextrw $6, %xmm0, %eax
443
+ ; AVX-NEXT: imull $32897, %eax, %eax # imm = 0x8081
444
+ ; AVX-NEXT: shrl $31, %eax
445
+ ; AVX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
446
+ ; AVX-NEXT: vpextrw $7, %xmm0, %eax
447
+ ; AVX-NEXT: shrl $15, %eax
448
+ ; AVX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0
449
+ ; AVX-NEXT: retq
450
+ %1 = udiv <8 x i16 > %x , <i16 23 , i16 34 , i16 -23 , i16 56 , i16 128 , i16 -1 , i16 -256 , i16 -32768 >
451
+ ret <8 x i16 > %1
452
+ }
0 commit comments