Skip to content

Commit 79a8566

Browse files
committed
[X86] Fold (v4i32 (scalar_to_vector (i32 (zextext (bitcast (f16)))))) -> (v4i32 bitcast (shuffle (v8f16 scalar_to_vector)))
Extension to #123338
1 parent b8b35b9 commit 79a8566

9 files changed

+186
-368
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59595,12 +59595,20 @@ static SDValue combineSCALAR_TO_VECTOR(SDNode *N, SelectionDAG &DAG,
5959559595

5959659596
if (VT == MVT::v4i32) {
5959759597
SDValue HalfSrc;
59598-
// Combine (v4i32 (scalar_to_vector (i32 (anyext (bitcast (f16))))))
59598+
// Combine (v4i32 (scalar_to_vector (i32 (a/zext (bitcast (f16))))))
5959959599
// to remove XMM->GPR->XMM moves.
5960059600
if (sd_match(Src, m_AnyExt(m_BitCast(
5960159601
m_AllOf(m_SpecificVT(MVT::f16), m_Value(HalfSrc))))))
5960259602
return DAG.getBitcast(
5960359603
VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f16, HalfSrc));
59604+
if (sd_match(Src, m_ZExt(m_BitCast(m_AllOf(m_SpecificVT(MVT::f16),
59605+
m_Value(HalfSrc)))))) {
59606+
SDValue R = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f16, HalfSrc);
59607+
R = DAG.getVectorShuffle(MVT::v8f16, DL, R,
59608+
getZeroVector(MVT::v8f16, Subtarget, DAG, DL),
59609+
{0, 8, -1, -1, -1, -1, -1, -1});
59610+
return DAG.getBitcast(VT, R);
59611+
}
5960459612
}
5960559613

5960659614
// See if we're broadcasting the scalar value, in which case just reuse that.

llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,11 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) nounwind {
4343
;
4444
; AVX512-LABEL: v_test_canonicalize__half:
4545
; AVX512: # %bb.0: # %entry
46-
; AVX512-NEXT: movzwl (%rdi), %eax
47-
; AVX512-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
48-
; AVX512-NEXT: vmovd %ecx, %xmm0
46+
; AVX512-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
47+
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
4948
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
50-
; AVX512-NEXT: vmovd %eax, %xmm1
51-
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
52-
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
49+
; AVX512-NEXT: vcvtph2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
50+
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
5351
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
5452
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
5553
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -144,9 +142,7 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) nounwind {
144142
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
145143
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
146144
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
147-
; AVX512-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
148-
; AVX512-NEXT: vmovd %eax, %xmm2
149-
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
145+
; AVX512-NEXT: vcvtph2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
150146
; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
151147
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
152148
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
@@ -228,9 +224,7 @@ define void @v_test_canonicalize_v2half(<2 x half> addrspace(1)* %out) nounwind
228224
; AVX512-LABEL: v_test_canonicalize_v2half:
229225
; AVX512: # %bb.0: # %entry
230226
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
231-
; AVX512-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
232-
; AVX512-NEXT: vmovd %eax, %xmm1
233-
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
227+
; AVX512-NEXT: vcvtph2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
234228
; AVX512-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
235229
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
236230
; AVX512-NEXT: vmulss %xmm1, %xmm2, %xmm2

llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1840,9 +1840,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
18401840
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
18411841
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
18421842
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
1843-
; AVX512-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
1844-
; AVX512-NEXT: vmovd %eax, %xmm1
1845-
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm9
1843+
; AVX512-NEXT: vcvtph2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm9
18461844
; AVX512-NEXT: vmulss %xmm0, %xmm9, %xmm0
18471845
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm4[3,3,3,3]
18481846
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1

0 commit comments

Comments
 (0)