Skip to content

Commit 2011ad0

Browse files
committed
[X86][FP16] Do not generate VBROADCAST for fp16
We cannot lower VBROADCAST i16 under AVX1. Fixes llvm#63114 Differential Revision: https://reviews.llvm.org/D152350
1 parent 405faef commit 2011ad0

File tree

3 files changed

+228
-67
lines changed

3 files changed

+228
-67
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15002,9 +15002,10 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
1500215002
SDValue V2, ArrayRef<int> Mask,
1500315003
const X86Subtarget &Subtarget,
1500415004
SelectionDAG &DAG) {
15005+
MVT EltVT = VT.getVectorElementType();
1500515006
if (!((Subtarget.hasSSE3() && VT == MVT::v2f64) ||
15006-
(Subtarget.hasAVX() && VT.isFloatingPoint()) ||
15007-
(Subtarget.hasAVX2() && VT.isInteger())))
15007+
(Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
15008+
(Subtarget.hasAVX2() && (VT.isInteger() || EltVT == MVT::f16))))
1500815009
return SDValue();
1500915010

1501015011
// With MOVDDUP (v2f64) we can broadcast from a register or a load, otherwise

llvm/test/CodeGen/X86/half.ll

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2110,4 +2110,113 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 {
21102110
ret <8 x half> %3
21112111
}
21122112

2113+
define void @pr63114() {
2114+
; CHECK-LIBCALL-LABEL: pr63114:
2115+
; CHECK-LIBCALL: # %bb.0:
2116+
; CHECK-LIBCALL-NEXT: movdqu (%rax), %xmm4
2117+
; CHECK-LIBCALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,1,3,3,4,5,6,7]
2118+
; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1]
2119+
; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535]
2120+
; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm0
2121+
; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0]
2122+
; CHECK-LIBCALL-NEXT: por %xmm2, %xmm0
2123+
; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0]
2124+
; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm0
2125+
; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm5 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60]
2126+
; CHECK-LIBCALL-NEXT: por %xmm5, %xmm0
2127+
; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm6 = xmm4[0,1,2,3,4,5,7,7]
2128+
; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3]
2129+
; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm6
2130+
; CHECK-LIBCALL-NEXT: por %xmm2, %xmm6
2131+
; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm6
2132+
; CHECK-LIBCALL-NEXT: por %xmm5, %xmm6
2133+
; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm7 = xmm4[0,1,2,3,5,5,5,5]
2134+
; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,3,0,3]
2135+
; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,5,5,5,5]
2136+
; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm4
2137+
; CHECK-LIBCALL-NEXT: por %xmm2, %xmm4
2138+
; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm4
2139+
; CHECK-LIBCALL-NEXT: por %xmm5, %xmm4
2140+
; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm7
2141+
; CHECK-LIBCALL-NEXT: por %xmm2, %xmm7
2142+
; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm7
2143+
; CHECK-LIBCALL-NEXT: por %xmm5, %xmm7
2144+
; CHECK-LIBCALL-NEXT: movdqu %xmm7, 0
2145+
; CHECK-LIBCALL-NEXT: movdqu %xmm4, 32
2146+
; CHECK-LIBCALL-NEXT: movdqu %xmm6, 48
2147+
; CHECK-LIBCALL-NEXT: movdqu %xmm0, 16
2148+
; CHECK-LIBCALL-NEXT: retq
2149+
;
2150+
; BWON-F16C-LABEL: pr63114:
2151+
; BWON-F16C: # %bb.0:
2152+
; BWON-F16C-NEXT: vmovdqu (%rax), %xmm0
2153+
; BWON-F16C-NEXT: vpsrld $16, %xmm0, %xmm1
2154+
; BWON-F16C-NEXT: vbroadcastss (%rax), %xmm2
2155+
; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2156+
; BWON-F16C-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,0],xmm3[0,0]
2157+
; BWON-F16C-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
2158+
; BWON-F16C-NEXT: vpsllq $48, %xmm3, %xmm4
2159+
; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3],xmm2[4,5,6,7]
2160+
; BWON-F16C-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
2161+
; BWON-F16C-NEXT: vpor %xmm3, %xmm2, %xmm2
2162+
; BWON-F16C-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,3],xmm1[2,0]
2163+
; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3],xmm1[4,5,6,7]
2164+
; BWON-F16C-NEXT: vpor %xmm3, %xmm1, %xmm1
2165+
; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
2166+
; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,1,3,3,4,5,6,7]
2167+
; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,1]
2168+
; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3],xmm2[4,5,6,7]
2169+
; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm3[7]
2170+
; BWON-F16C-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2171+
; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[3],xmm0[4,5,6,7]
2172+
; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm3[7]
2173+
; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2174+
; BWON-F16C-NEXT: vmovups %ymm0, 0
2175+
; BWON-F16C-NEXT: vmovups %ymm1, 32
2176+
; BWON-F16C-NEXT: vzeroupper
2177+
; BWON-F16C-NEXT: retq
2178+
;
2179+
; CHECK-I686-LABEL: pr63114:
2180+
; CHECK-I686: # %bb.0:
2181+
; CHECK-I686-NEXT: movdqu (%eax), %xmm6
2182+
; CHECK-I686-NEXT: pshuflw {{.*#+}} xmm0 = xmm6[0,1,3,3,4,5,6,7]
2183+
; CHECK-I686-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1]
2184+
; CHECK-I686-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535]
2185+
; CHECK-I686-NEXT: pand %xmm1, %xmm0
2186+
; CHECK-I686-NEXT: movdqa {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0]
2187+
; CHECK-I686-NEXT: por %xmm2, %xmm0
2188+
; CHECK-I686-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0]
2189+
; CHECK-I686-NEXT: pand %xmm3, %xmm0
2190+
; CHECK-I686-NEXT: movdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60]
2191+
; CHECK-I686-NEXT: por %xmm4, %xmm0
2192+
; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm5 = xmm6[0,1,2,3,4,5,7,7]
2193+
; CHECK-I686-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
2194+
; CHECK-I686-NEXT: pand %xmm1, %xmm5
2195+
; CHECK-I686-NEXT: por %xmm2, %xmm5
2196+
; CHECK-I686-NEXT: pand %xmm3, %xmm5
2197+
; CHECK-I686-NEXT: por %xmm4, %xmm5
2198+
; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm7 = xmm6[0,1,2,3,5,5,5,5]
2199+
; CHECK-I686-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,3,0,3]
2200+
; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,5,5,5,5]
2201+
; CHECK-I686-NEXT: pand %xmm1, %xmm6
2202+
; CHECK-I686-NEXT: por %xmm2, %xmm6
2203+
; CHECK-I686-NEXT: pand %xmm3, %xmm6
2204+
; CHECK-I686-NEXT: por %xmm4, %xmm6
2205+
; CHECK-I686-NEXT: pand %xmm1, %xmm7
2206+
; CHECK-I686-NEXT: por %xmm2, %xmm7
2207+
; CHECK-I686-NEXT: pand %xmm3, %xmm7
2208+
; CHECK-I686-NEXT: por %xmm4, %xmm7
2209+
; CHECK-I686-NEXT: movdqu %xmm7, 0
2210+
; CHECK-I686-NEXT: movdqu %xmm6, 32
2211+
; CHECK-I686-NEXT: movdqu %xmm5, 48
2212+
; CHECK-I686-NEXT: movdqu %xmm0, 16
2213+
; CHECK-I686-NEXT: retl
2214+
%1 = load <24 x half>, ptr poison, align 2
2215+
%2 = shufflevector <24 x half> %1, <24 x half> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
2216+
%3 = shufflevector <8 x half> %2, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2217+
%4 = shufflevector <16 x half> poison, <16 x half> %3, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
2218+
store <32 x half> %4, ptr null, align 2
2219+
ret void
2220+
}
2221+
21132222
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)