Skip to content

Commit da8845f

Browse files
committed
[X86][AVX] Add PR48908 shuffle test case
1 parent aa76ceb commit da8845f

File tree

1 file changed

+151
-0
lines changed

1 file changed

+151
-0
lines changed

llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,157 @@ entry:
436436
unreachable
437437
}
438438

439+
define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double>* noalias %out0, <4 x double>* noalias %out1, <4 x double>* noalias %out2) {
440+
; X86-AVX1-LABEL: PR48908:
441+
; X86-AVX1: # %bb.0:
442+
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax
443+
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx
444+
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx
445+
; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1]
446+
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4
447+
; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3]
448+
; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1]
449+
; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1]
450+
; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2]
451+
; X86-AVX1-NEXT: vmovapd %ymm4, (%edx)
452+
; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1]
453+
; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3]
454+
; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
455+
; X86-AVX1-NEXT: vmovapd %ymm3, (%ecx)
456+
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
457+
; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
458+
; X86-AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
459+
; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
460+
; X86-AVX1-NEXT: vmovapd %ymm0, (%eax)
461+
; X86-AVX1-NEXT: vzeroupper
462+
; X86-AVX1-NEXT: retl
463+
;
464+
; X86-AVX2-LABEL: PR48908:
465+
; X86-AVX2: # %bb.0:
466+
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
467+
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
468+
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx
469+
; X86-AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3
470+
; X86-AVX2-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
471+
; X86-AVX2-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0]
472+
; X86-AVX2-NEXT: vperm2f128 {{.*#+}} ymm6 = ymm0[0,1],ymm2[0,1]
473+
; X86-AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,1]
474+
; X86-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3]
475+
; X86-AVX2-NEXT: vmovapd %ymm3, (%edx)
476+
; X86-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm0[2],ymm5[3]
477+
; X86-AVX2-NEXT: vpermpd {{.*#+}} ymm4 = ymm4[0,3,2,0]
478+
; X86-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
479+
; X86-AVX2-NEXT: vmovapd %ymm3, (%ecx)
480+
; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
481+
; X86-AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
482+
; X86-AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
483+
; X86-AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
484+
; X86-AVX2-NEXT: vmovapd %ymm0, (%eax)
485+
; X86-AVX2-NEXT: vzeroupper
486+
; X86-AVX2-NEXT: retl
487+
;
488+
; X86-AVX512-LABEL: PR48908:
489+
; X86-AVX512: # %bb.0:
490+
; X86-AVX512-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
491+
; X86-AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
492+
; X86-AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
493+
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
494+
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
495+
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx
496+
; X86-AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3
497+
; X86-AVX512-NEXT: vshufpd {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[2]
498+
; X86-AVX512-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
499+
; X86-AVX512-NEXT: vshufpd {{.*#+}} ymm4 = ymm1[1],ymm4[0],ymm1[2],ymm4[3]
500+
; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm5 = [0,0,3,0,8,0,1,0]
501+
; X86-AVX512-NEXT: vpermt2pd %zmm2, %zmm5, %zmm3
502+
; X86-AVX512-NEXT: vmovapd %ymm3, (%edx)
503+
; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = [0,0,3,0,10,0,1,0]
504+
; X86-AVX512-NEXT: vpermt2pd %zmm0, %zmm3, %zmm4
505+
; X86-AVX512-NEXT: vmovapd %ymm4, (%ecx)
506+
; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = <3,0,11,0,u,u,u,u>
507+
; X86-AVX512-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3
508+
; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [2,0,8,0,9,0,3,0]
509+
; X86-AVX512-NEXT: vpermi2pd %zmm3, %zmm2, %zmm0
510+
; X86-AVX512-NEXT: vmovapd %ymm0, (%eax)
511+
; X86-AVX512-NEXT: vzeroupper
512+
; X86-AVX512-NEXT: retl
513+
;
514+
; X64-AVX1-LABEL: PR48908:
515+
; X64-AVX1: # %bb.0:
516+
; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1]
517+
; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4
518+
; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3]
519+
; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1]
520+
; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1]
521+
; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2]
522+
; X64-AVX1-NEXT: vmovapd %ymm4, (%rdi)
523+
; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1]
524+
; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3]
525+
; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
526+
; X64-AVX1-NEXT: vmovapd %ymm3, (%rsi)
527+
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
528+
; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
529+
; X64-AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
530+
; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
531+
; X64-AVX1-NEXT: vmovapd %ymm0, (%rdx)
532+
; X64-AVX1-NEXT: vzeroupper
533+
; X64-AVX1-NEXT: retq
534+
;
535+
; X64-AVX2-LABEL: PR48908:
536+
; X64-AVX2: # %bb.0:
537+
; X64-AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3
538+
; X64-AVX2-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
539+
; X64-AVX2-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0]
540+
; X64-AVX2-NEXT: vperm2f128 {{.*#+}} ymm6 = ymm0[0,1],ymm2[0,1]
541+
; X64-AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,1]
542+
; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3]
543+
; X64-AVX2-NEXT: vmovapd %ymm3, (%rdi)
544+
; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm0[2],ymm5[3]
545+
; X64-AVX2-NEXT: vpermpd {{.*#+}} ymm4 = ymm4[0,3,2,0]
546+
; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
547+
; X64-AVX2-NEXT: vmovapd %ymm3, (%rsi)
548+
; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
549+
; X64-AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
550+
; X64-AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
551+
; X64-AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
552+
; X64-AVX2-NEXT: vmovapd %ymm0, (%rdx)
553+
; X64-AVX2-NEXT: vzeroupper
554+
; X64-AVX2-NEXT: retq
555+
;
556+
; X64-AVX512-LABEL: PR48908:
557+
; X64-AVX512: # %bb.0:
558+
; X64-AVX512-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
559+
; X64-AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
560+
; X64-AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
561+
; X64-AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3
562+
; X64-AVX512-NEXT: vshufpd {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[2]
563+
; X64-AVX512-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
564+
; X64-AVX512-NEXT: vshufpd {{.*#+}} ymm4 = ymm1[1],ymm4[0],ymm1[2],ymm4[3]
565+
; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm5 = [0,3,8,1]
566+
; X64-AVX512-NEXT: vpermt2pd %zmm2, %zmm5, %zmm3
567+
; X64-AVX512-NEXT: vmovapd %ymm3, (%rdi)
568+
; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = [0,3,10,1]
569+
; X64-AVX512-NEXT: vpermt2pd %zmm0, %zmm3, %zmm4
570+
; X64-AVX512-NEXT: vmovapd %ymm4, (%rsi)
571+
; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = <3,11,u,u>
572+
; X64-AVX512-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3
573+
; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [2,8,9,3]
574+
; X64-AVX512-NEXT: vpermi2pd %zmm3, %zmm2, %zmm0
575+
; X64-AVX512-NEXT: vmovapd %ymm0, (%rdx)
576+
; X64-AVX512-NEXT: vzeroupper
577+
; X64-AVX512-NEXT: retq
578+
%t0 = shufflevector <4 x double> %v0, <4 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
579+
%t1 = shufflevector <4 x double> %v1, <4 x double> %v2, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
580+
%r0 = shufflevector <4 x double> %t0, <4 x double> %t1, <4 x i32> <i32 0, i32 3, i32 6, i32 1>
581+
store <4 x double> %r0, <4 x double>* %out0, align 32
582+
%r1 = shufflevector <4 x double> %t0, <4 x double> %t1, <4 x i32> <i32 4, i32 7, i32 2, i32 5>
583+
store <4 x double> %r1, <4 x double>* %out1, align 32
584+
%t2 = shufflevector <4 x double> %v0, <4 x double> %v1, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef>
585+
%r2 = shufflevector <4 x double> %t2, <4 x double> %v2, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
586+
store <4 x double> %r2, <4 x double>* %out2, align 32
587+
ret void
588+
}
589+
439590
define <4 x i64> @concat_self_v4i64(<2 x i64> %x) {
440591
; AVX1-LABEL: concat_self_v4i64:
441592
; AVX1: # %bb.0:

0 commit comments

Comments
 (0)