Skip to content

Commit 0dd93c7

Browse files
committed
[X86] Fold (v4i32 (scalar_to_vector (i32 (zextext (bitcast (f16)))))) -> (v4i32 bitcast (shuffle (v8f16 scalar_to_vector)))
Extension to #123338
1 parent 78f690b commit 0dd93c7

10 files changed

+187
-371
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58721,12 +58721,20 @@ static SDValue combineSCALAR_TO_VECTOR(SDNode *N, SelectionDAG &DAG,
5872158721

5872258722
if (VT == MVT::v4i32) {
5872358723
SDValue HalfSrc;
58724-
// Combine (v4i32 (scalar_to_vector (i32 (anyext (bitcast (f16))))))
58724+
// Combine (v4i32 (scalar_to_vector (i32 (a/zext (bitcast (f16))))))
5872558725
// to remove XMM->GPR->XMM moves.
5872658726
if (sd_match(Src, m_AnyExt(m_BitCast(
5872758727
m_AllOf(m_SpecificVT(MVT::f16), m_Value(HalfSrc))))))
5872858728
return DAG.getBitcast(
5872958729
VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f16, HalfSrc));
58730+
if (sd_match(Src, m_ZExt(m_BitCast(m_AllOf(m_SpecificVT(MVT::f16),
58731+
m_Value(HalfSrc)))))) {
58732+
SDValue R = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f16, HalfSrc);
58733+
R = DAG.getVectorShuffle(MVT::v8f16, DL, R,
58734+
getZeroVector(MVT::v8f16, Subtarget, DAG, DL),
58735+
{0, 8, -1, -1, -1, -1, -1, -1});
58736+
return DAG.getBitcast(VT, R);
58737+
}
5873058738
}
5873158739

5873258740
// See if we're broadcasting the scalar value, in which case just reuse that.

llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,11 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) nounwind {
4343
;
4444
; AVX512-LABEL: v_test_canonicalize__half:
4545
; AVX512: # %bb.0: # %entry
46-
; AVX512-NEXT: movzwl (%rdi), %eax
47-
; AVX512-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
48-
; AVX512-NEXT: vmovd %ecx, %xmm0
46+
; AVX512-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
47+
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
4948
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
50-
; AVX512-NEXT: vmovd %eax, %xmm1
51-
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
52-
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
49+
; AVX512-NEXT: vcvtph2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
50+
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
5351
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
5452
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
5553
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -144,9 +142,7 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) nounwind {
144142
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
145143
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
146144
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
147-
; AVX512-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
148-
; AVX512-NEXT: vmovd %eax, %xmm2
149-
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
145+
; AVX512-NEXT: vcvtph2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
150146
; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
151147
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
152148
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
@@ -228,9 +224,7 @@ define void @v_test_canonicalize_v2half(<2 x half> addrspace(1)* %out) nounwind
228224
; AVX512-LABEL: v_test_canonicalize_v2half:
229225
; AVX512: # %bb.0: # %entry
230226
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
231-
; AVX512-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
232-
; AVX512-NEXT: vmovd %eax, %xmm1
233-
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
227+
; AVX512-NEXT: vcvtph2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
234228
; AVX512-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
235229
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
236230
; AVX512-NEXT: vmulss %xmm1, %xmm2, %xmm2

llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1840,9 +1840,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
18401840
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
18411841
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
18421842
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
1843-
; AVX512-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
1844-
; AVX512-NEXT: vmovd %eax, %xmm1
1845-
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm9
1843+
; AVX512-NEXT: vcvtph2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm9
18461844
; AVX512-NEXT: vmulss %xmm0, %xmm9, %xmm0
18471845
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm4[3,3,3,3]
18481846
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1

llvm/test/CodeGen/X86/fp-round.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,7 @@ define half @round_f16(half %h) {
5050
;
5151
; AVX512F-LABEL: round_f16:
5252
; AVX512F: # %bb.0: # %entry
53-
; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
54-
; AVX512F-NEXT: movzwl %ax, %eax
55-
; AVX512F-NEXT: vmovd %eax, %xmm0
53+
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
5654
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
5755
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
5856
; AVX512F-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)

0 commit comments

Comments
 (0)