@@ -5045,6 +5045,64 @@ define <32 x i8> @shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_
5045
5045
ret <32 x i8 > %5
5046
5046
}
5047
5047
5048
+ ; PR116931
5049
+ define void @shuffle_v32i8_store_00_08_16_24_01_09_17_25_02_10_18_26_03_11_19_27_04_12_20_28_05_13_21_29_06_14_22_30_07_15_23_31 (ptr %out , <32 x i8 > %a0 ) {
5050
+ ; AVX1-LABEL: shuffle_v32i8_store_00_08_16_24_01_09_17_25_02_10_18_26_03_11_19_27_04_12_20_28_05_13_21_29_06_14_22_30_07_15_23_31:
5051
+ ; AVX1: # %bb.0:
5052
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5053
+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15]
5054
+ ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5055
+ ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
5056
+ ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5057
+ ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5058
+ ; AVX1-NEXT: vmovdqa %xmm0, 16(%rdi)
5059
+ ; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
5060
+ ; AVX1-NEXT: vzeroupper
5061
+ ; AVX1-NEXT: retq
5062
+ ;
5063
+ ; AVX2OR512VL-LABEL: shuffle_v32i8_store_00_08_16_24_01_09_17_25_02_10_18_26_03_11_19_27_04_12_20_28_05_13_21_29_06_14_22_30_07_15_23_31:
5064
+ ; AVX2OR512VL: # %bb.0:
5065
+ ; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
5066
+ ; AVX2OR512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15]
5067
+ ; AVX2OR512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5068
+ ; AVX2OR512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm1
5069
+ ; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5070
+ ; AVX2OR512VL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5071
+ ; AVX2OR512VL-NEXT: vmovdqa %xmm0, 16(%rdi)
5072
+ ; AVX2OR512VL-NEXT: vmovdqa %xmm2, (%rdi)
5073
+ ; AVX2OR512VL-NEXT: vzeroupper
5074
+ ; AVX2OR512VL-NEXT: retq
5075
+ ;
5076
+ ; XOPAVX1-LABEL: shuffle_v32i8_store_00_08_16_24_01_09_17_25_02_10_18_26_03_11_19_27_04_12_20_28_05_13_21_29_06_14_22_30_07_15_23_31:
5077
+ ; XOPAVX1: # %bb.0:
5078
+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5079
+ ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15]
5080
+ ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5081
+ ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
5082
+ ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5083
+ ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5084
+ ; XOPAVX1-NEXT: vmovdqa %xmm0, 16(%rdi)
5085
+ ; XOPAVX1-NEXT: vmovdqa %xmm2, (%rdi)
5086
+ ; XOPAVX1-NEXT: vzeroupper
5087
+ ; XOPAVX1-NEXT: retq
5088
+ ;
5089
+ ; XOPAVX2-LABEL: shuffle_v32i8_store_00_08_16_24_01_09_17_25_02_10_18_26_03_11_19_27_04_12_20_28_05_13_21_29_06_14_22_30_07_15_23_31:
5090
+ ; XOPAVX2: # %bb.0:
5091
+ ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
5092
+ ; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15]
5093
+ ; XOPAVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5094
+ ; XOPAVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
5095
+ ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5096
+ ; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5097
+ ; XOPAVX2-NEXT: vmovdqa %xmm0, 16(%rdi)
5098
+ ; XOPAVX2-NEXT: vmovdqa %xmm2, (%rdi)
5099
+ ; XOPAVX2-NEXT: vzeroupper
5100
+ ; XOPAVX2-NEXT: retq
5101
+ %r = shufflevector <32 x i8 > %a0 , <32 x i8 > poison, <32 x i32 > <i32 0 , i32 8 , i32 16 , i32 24 , i32 1 , i32 9 , i32 17 , i32 25 , i32 2 , i32 10 , i32 18 , i32 26 , i32 3 , i32 11 , i32 19 , i32 27 , i32 4 , i32 12 , i32 20 , i32 28 , i32 5 , i32 13 , i32 21 , i32 29 , i32 6 , i32 14 , i32 22 , i32 30 , i32 7 , i32 15 , i32 23 , i32 31 >
5102
+ store <32 x i8 > %r , ptr %out , align 32
5103
+ ret void
5104
+ }
5105
+
5048
5106
define <4 x i64 > @PR28136 (<32 x i8 > %a0 , <32 x i8 > %a1 ) {
5049
5107
; AVX1-LABEL: PR28136:
5050
5108
; AVX1: # %bb.0:
0 commit comments