Skip to content

Commit 8868cda

Browse files
committed
[X86] Add UDIV by uniform/non-uniform constant tests
llvm-svn: 336894
1 parent 053ff54 commit 8868cda

File tree

1 file changed

+126
-0
lines changed

1 file changed

+126
-0
lines changed

llvm/test/CodeGen/X86/combine-udiv.ll

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,3 +324,129 @@ define <4 x i32> @combine_vec_udiv_by_shl_pow2b(<4 x i32> %x, <4 x i32> %y) {
324324
%2 = udiv <4 x i32> %x, %1
325325
ret <4 x i32> %2
326326
}
327+
328+
; fold (udiv x, c1)
329+
define i32 @combine_udiv_uniform(i32 %x) {
330+
; CHECK-LABEL: combine_udiv_uniform:
331+
; CHECK: # %bb.0:
332+
; CHECK-NEXT: movl %edi, %ecx
333+
; CHECK-NEXT: movl $2987803337, %eax # imm = 0xB21642C9
334+
; CHECK-NEXT: imulq %rcx, %rax
335+
; CHECK-NEXT: shrq $36, %rax
336+
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
337+
; CHECK-NEXT: retq
338+
%1 = udiv i32 %x, 23
339+
ret i32 %1
340+
}
341+
342+
define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
343+
; SSE-LABEL: combine_vec_udiv_uniform:
344+
; SSE: # %bb.0:
345+
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [25645,25645,25645,25645,25645,25645,25645,25645]
346+
; SSE-NEXT: pmulhuw %xmm0, %xmm1
347+
; SSE-NEXT: psubw %xmm1, %xmm0
348+
; SSE-NEXT: psrlw $1, %xmm0
349+
; SSE-NEXT: paddw %xmm1, %xmm0
350+
; SSE-NEXT: psrlw $4, %xmm0
351+
; SSE-NEXT: retq
352+
;
353+
; AVX-LABEL: combine_vec_udiv_uniform:
354+
; AVX: # %bb.0:
355+
; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
356+
; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0
357+
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
358+
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
359+
; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
360+
; AVX-NEXT: retq
361+
%1 = udiv <8 x i16> %x, <i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23>
362+
ret <8 x i16> %1
363+
}
364+
365+
define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
366+
; SSE-LABEL: combine_vec_udiv_nonuniform:
367+
; SSE: # %bb.0:
368+
; SSE-NEXT: movd %xmm0, %eax
369+
; SSE-NEXT: movzwl %ax, %ecx
370+
; SSE-NEXT: imull $25645, %ecx, %ecx # imm = 0x642D
371+
; SSE-NEXT: shrl $16, %ecx
372+
; SSE-NEXT: subl %ecx, %eax
373+
; SSE-NEXT: movzwl %ax, %eax
374+
; SSE-NEXT: shrl %eax
375+
; SSE-NEXT: addl %ecx, %eax
376+
; SSE-NEXT: shrl $4, %eax
377+
; SSE-NEXT: movd %eax, %xmm1
378+
; SSE-NEXT: pextrw $1, %xmm0, %eax
379+
; SSE-NEXT: imull $61681, %eax, %eax # imm = 0xF0F1
380+
; SSE-NEXT: shrl $21, %eax
381+
; SSE-NEXT: pinsrw $1, %eax, %xmm1
382+
; SSE-NEXT: pextrw $2, %xmm0, %eax
383+
; SSE-NEXT: imull $8195, %eax, %eax # imm = 0x2003
384+
; SSE-NEXT: shrl $29, %eax
385+
; SSE-NEXT: pinsrw $2, %eax, %xmm1
386+
; SSE-NEXT: pextrw $3, %xmm0, %eax
387+
; SSE-NEXT: shrl $3, %eax
388+
; SSE-NEXT: imull $9363, %eax, %eax # imm = 0x2493
389+
; SSE-NEXT: shrl $16, %eax
390+
; SSE-NEXT: pinsrw $3, %eax, %xmm1
391+
; SSE-NEXT: pextrw $4, %xmm0, %eax
392+
; SSE-NEXT: shrl $7, %eax
393+
; SSE-NEXT: pinsrw $4, %eax, %xmm1
394+
; SSE-NEXT: pextrw $5, %xmm0, %eax
395+
; SSE-NEXT: xorl %ecx, %ecx
396+
; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
397+
; SSE-NEXT: sete %cl
398+
; SSE-NEXT: pinsrw $5, %ecx, %xmm1
399+
; SSE-NEXT: pextrw $6, %xmm0, %eax
400+
; SSE-NEXT: imull $32897, %eax, %eax # imm = 0x8081
401+
; SSE-NEXT: shrl $31, %eax
402+
; SSE-NEXT: pinsrw $6, %eax, %xmm1
403+
; SSE-NEXT: pextrw $7, %xmm0, %eax
404+
; SSE-NEXT: shrl $15, %eax
405+
; SSE-NEXT: pinsrw $7, %eax, %xmm1
406+
; SSE-NEXT: movdqa %xmm1, %xmm0
407+
; SSE-NEXT: retq
408+
;
409+
; AVX-LABEL: combine_vec_udiv_nonuniform:
410+
; AVX: # %bb.0:
411+
; AVX-NEXT: vmovd %xmm0, %eax
412+
; AVX-NEXT: movzwl %ax, %ecx
413+
; AVX-NEXT: imull $25645, %ecx, %ecx # imm = 0x642D
414+
; AVX-NEXT: shrl $16, %ecx
415+
; AVX-NEXT: subl %ecx, %eax
416+
; AVX-NEXT: movzwl %ax, %eax
417+
; AVX-NEXT: shrl %eax
418+
; AVX-NEXT: addl %ecx, %eax
419+
; AVX-NEXT: shrl $4, %eax
420+
; AVX-NEXT: vmovd %eax, %xmm1
421+
; AVX-NEXT: vpextrw $1, %xmm0, %eax
422+
; AVX-NEXT: imull $61681, %eax, %eax # imm = 0xF0F1
423+
; AVX-NEXT: shrl $21, %eax
424+
; AVX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
425+
; AVX-NEXT: vpextrw $2, %xmm0, %eax
426+
; AVX-NEXT: imull $8195, %eax, %eax # imm = 0x2003
427+
; AVX-NEXT: shrl $29, %eax
428+
; AVX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
429+
; AVX-NEXT: vpextrw $3, %xmm0, %eax
430+
; AVX-NEXT: shrl $3, %eax
431+
; AVX-NEXT: imull $9363, %eax, %eax # imm = 0x2493
432+
; AVX-NEXT: shrl $16, %eax
433+
; AVX-NEXT: vpinsrw $3, %eax, %xmm1, %xmm1
434+
; AVX-NEXT: vpextrw $4, %xmm0, %eax
435+
; AVX-NEXT: shrl $7, %eax
436+
; AVX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
437+
; AVX-NEXT: vpextrw $5, %xmm0, %eax
438+
; AVX-NEXT: xorl %ecx, %ecx
439+
; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
440+
; AVX-NEXT: sete %cl
441+
; AVX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
442+
; AVX-NEXT: vpextrw $6, %xmm0, %eax
443+
; AVX-NEXT: imull $32897, %eax, %eax # imm = 0x8081
444+
; AVX-NEXT: shrl $31, %eax
445+
; AVX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
446+
; AVX-NEXT: vpextrw $7, %xmm0, %eax
447+
; AVX-NEXT: shrl $15, %eax
448+
; AVX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0
449+
; AVX-NEXT: retq
450+
%1 = udiv <8 x i16> %x, <i16 23, i16 34, i16 -23, i16 56, i16 128, i16 -1, i16 -256, i16 -32768>
451+
ret <8 x i16> %1
452+
}

0 commit comments

Comments
 (0)