@@ -99,3 +99,57 @@ define <16 x i8> @combine_shuffle_vrotli_v4i32(<4 x i32> %a0) {
99
99
ret <16 x i8 > %3
100
100
}
101
101
declare <4 x i32 > @llvm.fshl.v4i32 (<4 x i32 >, <4 x i32 >, <4 x i32 >)
102
+
103
+ define void @PR46178 (i16* %0 ) {
104
+ ; X86-LABEL: PR46178:
105
+ ; X86: # %bb.0:
106
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
107
+ ; X86-NEXT: vmovdqu 0, %ymm0
108
+ ; X86-NEXT: vmovdqu (%eax), %ymm1
109
+ ; X86-NEXT: vpmovqw %ymm0, %xmm0
110
+ ; X86-NEXT: vpmovqw %ymm1, %xmm1
111
+ ; X86-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
112
+ ; X86-NEXT: vpsllw $8, %ymm0, %ymm0
113
+ ; X86-NEXT: vpsraw $8, %ymm0, %ymm0
114
+ ; X86-NEXT: vmovapd {{.*#+}} ymm1 = [0,0,2,0,4,0,4,0]
115
+ ; X86-NEXT: vxorpd %xmm2, %xmm2, %xmm2
116
+ ; X86-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1
117
+ ; X86-NEXT: vmovupd %ymm1, (%eax)
118
+ ; X86-NEXT: vzeroupper
119
+ ; X86-NEXT: retl
120
+ ;
121
+ ; X64-LABEL: PR46178:
122
+ ; X64: # %bb.0:
123
+ ; X64-NEXT: vmovdqu 0, %ymm0
124
+ ; X64-NEXT: vmovdqu (%rax), %ymm1
125
+ ; X64-NEXT: vpmovqw %ymm0, %xmm0
126
+ ; X64-NEXT: vpmovqw %ymm1, %xmm1
127
+ ; X64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
128
+ ; X64-NEXT: vpsllw $8, %ymm0, %ymm0
129
+ ; X64-NEXT: vpsraw $8, %ymm0, %ymm0
130
+ ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
131
+ ; X64-NEXT: vmovdqa %xmm0, %xmm0
132
+ ; X64-NEXT: vmovdqu %ymm0, (%rdi)
133
+ ; X64-NEXT: vzeroupper
134
+ ; X64-NEXT: retq
135
+ %2 = load <4 x i64 >, <4 x i64 >* null , align 8
136
+ %3 = load <4 x i64 >, <4 x i64 >* undef , align 8
137
+ %4 = trunc <4 x i64 > %2 to <4 x i16 >
138
+ %5 = trunc <4 x i64 > %3 to <4 x i16 >
139
+ %6 = shl <4 x i16 > %4 , <i16 8 , i16 8 , i16 8 , i16 8 >
140
+ %7 = shl <4 x i16 > %5 , <i16 8 , i16 8 , i16 8 , i16 8 >
141
+ %8 = ashr exact <4 x i16 > %6 , <i16 8 , i16 8 , i16 8 , i16 8 >
142
+ %9 = ashr exact <4 x i16 > %7 , <i16 8 , i16 8 , i16 8 , i16 8 >
143
+ %10 = bitcast i16* %0 to <4 x i16 >*
144
+ %11 = getelementptr inbounds i16 , i16* %0 , i64 4
145
+ %12 = bitcast i16* %11 to <4 x i16 >*
146
+ %13 = getelementptr inbounds i16 , i16* %0 , i64 8
147
+ %14 = bitcast i16* %13 to <4 x i16 >*
148
+ %15 = getelementptr inbounds i16 , i16* %0 , i64 12
149
+ %16 = bitcast i16* %15 to <4 x i16 >*
150
+ store <4 x i16 > %8 , <4 x i16 >* %10 , align 2
151
+ store <4 x i16 > %9 , <4 x i16 >* %12 , align 2
152
+ store <4 x i16 > zeroinitializer , <4 x i16 >* %14 , align 2
153
+ store <4 x i16 > zeroinitializer , <4 x i16 >* %16 , align 2
154
+ ret void
155
+ }
0 commit comments