@@ -137,3 +137,31 @@ define void @PR142995(ptr %p0, ptr %p1, ptr %p2) nounwind #0 {
137
137
}
138
138
declare <5 x i32 > @llvm.masked.load.v5i32.p0 (ptr captures(none), i32 immarg, <5 x i1 >, <5 x i32 >)
139
139
declare <64 x i32 > @llvm.masked.load.v64i32.p0 (ptr captures(none), i32 immarg, <64 x i1 >, <64 x i32 >)
140
+
141
+ define <8 x double > @PR143606 (ptr %px , ptr %py ) {
142
+ ; X86-LABEL: PR143606:
143
+ ; X86: # %bb.0:
144
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
145
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
146
+ ; X86-NEXT: vmovapd (%ecx), %ymm0
147
+ ; X86-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0],mem[1,2],ymm0[3]
148
+ ; X86-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],mem[0],ymm0[2],mem[3]
149
+ ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
150
+ ; X86-NEXT: retl
151
+ ;
152
+ ; X64-LABEL: PR143606:
153
+ ; X64: # %bb.0:
154
+ ; X64-NEXT: vmovapd (%rdi), %ymm0
155
+ ; X64-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0],mem[1,2],ymm0[3]
156
+ ; X64-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],mem[0],ymm0[2],mem[3]
157
+ ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
158
+ ; X64-NEXT: retq
159
+ %x = load <4 x double >, ptr %px , align 32
160
+ %y.lo = load <4 x double >, ptr %py , align 32
161
+ %py.hi = getelementptr inbounds nuw i8 , ptr %py , i64 32
162
+ %y.hi = load <4 x double >, ptr %py.hi , align 32
163
+ %lo = shufflevector <4 x double > %x , <4 x double > %y.lo , <4 x i32 > <i32 0 , i32 5 , i32 6 , i32 3 >
164
+ %hi = call <4 x double > @llvm.x86.avx512.vpermi2var.pd.256 (<4 x double > %x , <4 x i64 > <i64 1 , i64 4 , i64 2 , i64 7 >, <4 x double > %y.hi )
165
+ %res = shufflevector <4 x double > %lo , <4 x double > %hi , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
166
+ ret <8 x double > %res
167
+ }
0 commit comments