@@ -2110,4 +2110,113 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 {
2110
2110
ret <8 x half > %3
2111
2111
}
2112
2112
2113
+ define void @pr63114 () {
2114
+ ; CHECK-LIBCALL-LABEL: pr63114:
2115
+ ; CHECK-LIBCALL: # %bb.0:
2116
+ ; CHECK-LIBCALL-NEXT: movdqu (%rax), %xmm4
2117
+ ; CHECK-LIBCALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,1,3,3,4,5,6,7]
2118
+ ; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1]
2119
+ ; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535]
2120
+ ; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm0
2121
+ ; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0]
2122
+ ; CHECK-LIBCALL-NEXT: por %xmm2, %xmm0
2123
+ ; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0]
2124
+ ; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm0
2125
+ ; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm5 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60]
2126
+ ; CHECK-LIBCALL-NEXT: por %xmm5, %xmm0
2127
+ ; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm6 = xmm4[0,1,2,3,4,5,7,7]
2128
+ ; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3]
2129
+ ; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm6
2130
+ ; CHECK-LIBCALL-NEXT: por %xmm2, %xmm6
2131
+ ; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm6
2132
+ ; CHECK-LIBCALL-NEXT: por %xmm5, %xmm6
2133
+ ; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm7 = xmm4[0,1,2,3,5,5,5,5]
2134
+ ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,3,0,3]
2135
+ ; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,5,5,5,5]
2136
+ ; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm4
2137
+ ; CHECK-LIBCALL-NEXT: por %xmm2, %xmm4
2138
+ ; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm4
2139
+ ; CHECK-LIBCALL-NEXT: por %xmm5, %xmm4
2140
+ ; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm7
2141
+ ; CHECK-LIBCALL-NEXT: por %xmm2, %xmm7
2142
+ ; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm7
2143
+ ; CHECK-LIBCALL-NEXT: por %xmm5, %xmm7
2144
+ ; CHECK-LIBCALL-NEXT: movdqu %xmm7, 0
2145
+ ; CHECK-LIBCALL-NEXT: movdqu %xmm4, 32
2146
+ ; CHECK-LIBCALL-NEXT: movdqu %xmm6, 48
2147
+ ; CHECK-LIBCALL-NEXT: movdqu %xmm0, 16
2148
+ ; CHECK-LIBCALL-NEXT: retq
2149
+ ;
2150
+ ; BWON-F16C-LABEL: pr63114:
2151
+ ; BWON-F16C: # %bb.0:
2152
+ ; BWON-F16C-NEXT: vmovdqu (%rax), %xmm0
2153
+ ; BWON-F16C-NEXT: vpsrld $16, %xmm0, %xmm1
2154
+ ; BWON-F16C-NEXT: vbroadcastss (%rax), %xmm2
2155
+ ; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2156
+ ; BWON-F16C-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,0],xmm3[0,0]
2157
+ ; BWON-F16C-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
2158
+ ; BWON-F16C-NEXT: vpsllq $48, %xmm3, %xmm4
2159
+ ; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3],xmm2[4,5,6,7]
2160
+ ; BWON-F16C-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
2161
+ ; BWON-F16C-NEXT: vpor %xmm3, %xmm2, %xmm2
2162
+ ; BWON-F16C-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,3],xmm1[2,0]
2163
+ ; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3],xmm1[4,5,6,7]
2164
+ ; BWON-F16C-NEXT: vpor %xmm3, %xmm1, %xmm1
2165
+ ; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
2166
+ ; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,1,3,3,4,5,6,7]
2167
+ ; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,1]
2168
+ ; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3],xmm2[4,5,6,7]
2169
+ ; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm3[7]
2170
+ ; BWON-F16C-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2171
+ ; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[3],xmm0[4,5,6,7]
2172
+ ; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm3[7]
2173
+ ; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2174
+ ; BWON-F16C-NEXT: vmovups %ymm0, 0
2175
+ ; BWON-F16C-NEXT: vmovups %ymm1, 32
2176
+ ; BWON-F16C-NEXT: vzeroupper
2177
+ ; BWON-F16C-NEXT: retq
2178
+ ;
2179
+ ; CHECK-I686-LABEL: pr63114:
2180
+ ; CHECK-I686: # %bb.0:
2181
+ ; CHECK-I686-NEXT: movdqu (%eax), %xmm6
2182
+ ; CHECK-I686-NEXT: pshuflw {{.*#+}} xmm0 = xmm6[0,1,3,3,4,5,6,7]
2183
+ ; CHECK-I686-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1]
2184
+ ; CHECK-I686-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535]
2185
+ ; CHECK-I686-NEXT: pand %xmm1, %xmm0
2186
+ ; CHECK-I686-NEXT: movdqa {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0]
2187
+ ; CHECK-I686-NEXT: por %xmm2, %xmm0
2188
+ ; CHECK-I686-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0]
2189
+ ; CHECK-I686-NEXT: pand %xmm3, %xmm0
2190
+ ; CHECK-I686-NEXT: movdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60]
2191
+ ; CHECK-I686-NEXT: por %xmm4, %xmm0
2192
+ ; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm5 = xmm6[0,1,2,3,4,5,7,7]
2193
+ ; CHECK-I686-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
2194
+ ; CHECK-I686-NEXT: pand %xmm1, %xmm5
2195
+ ; CHECK-I686-NEXT: por %xmm2, %xmm5
2196
+ ; CHECK-I686-NEXT: pand %xmm3, %xmm5
2197
+ ; CHECK-I686-NEXT: por %xmm4, %xmm5
2198
+ ; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm7 = xmm6[0,1,2,3,5,5,5,5]
2199
+ ; CHECK-I686-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,3,0,3]
2200
+ ; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,5,5,5,5]
2201
+ ; CHECK-I686-NEXT: pand %xmm1, %xmm6
2202
+ ; CHECK-I686-NEXT: por %xmm2, %xmm6
2203
+ ; CHECK-I686-NEXT: pand %xmm3, %xmm6
2204
+ ; CHECK-I686-NEXT: por %xmm4, %xmm6
2205
+ ; CHECK-I686-NEXT: pand %xmm1, %xmm7
2206
+ ; CHECK-I686-NEXT: por %xmm2, %xmm7
2207
+ ; CHECK-I686-NEXT: pand %xmm3, %xmm7
2208
+ ; CHECK-I686-NEXT: por %xmm4, %xmm7
2209
+ ; CHECK-I686-NEXT: movdqu %xmm7, 0
2210
+ ; CHECK-I686-NEXT: movdqu %xmm6, 32
2211
+ ; CHECK-I686-NEXT: movdqu %xmm5, 48
2212
+ ; CHECK-I686-NEXT: movdqu %xmm0, 16
2213
+ ; CHECK-I686-NEXT: retl
2214
+ %1 = load <24 x half >, ptr poison, align 2
2215
+ %2 = shufflevector <24 x half > %1 , <24 x half > poison, <8 x i32 > <i32 2 , i32 5 , i32 8 , i32 11 , i32 14 , i32 17 , i32 20 , i32 23 >
2216
+ %3 = shufflevector <8 x half > %2 , <8 x half > <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
2217
+ %4 = shufflevector <16 x half > poison, <16 x half > %3 , <32 x i32 > <i32 0 , i32 8 , i32 16 , i32 24 , i32 1 , i32 9 , i32 17 , i32 25 , i32 2 , i32 10 , i32 18 , i32 26 , i32 3 , i32 11 , i32 19 , i32 27 , i32 4 , i32 12 , i32 20 , i32 28 , i32 5 , i32 13 , i32 21 , i32 29 , i32 6 , i32 14 , i32 22 , i32 30 , i32 7 , i32 15 , i32 23 , i32 31 >
2218
+ store <32 x half > %4 , ptr null , align 2
2219
+ ret void
2220
+ }
2221
+
2113
2222
attributes #0 = { nounwind }
0 commit comments