Skip to content

Commit 7656902

Browse files
authored
[X86] Fold (v4i32 (scalar_to_vector (i32 (anyext (bitcast (f16)))))) -> (v4i32 bitcast (v8f16 scalar_to_vector)) (#123338)
This pattern tends to appear during f16 -> f32 promotion Partially addresses the unnecessary XMM->GPR->XMM moves when working with f16 types (#107086)
1 parent 8ae1cb2 commit 7656902

13 files changed

+70
-160
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58574,6 +58574,7 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
5857458574

5857558575
static SDValue combineSCALAR_TO_VECTOR(SDNode *N, SelectionDAG &DAG,
5857658576
const X86Subtarget &Subtarget) {
58577+
using namespace SDPatternMatch;
5857758578
EVT VT = N->getValueType(0);
5857858579
SDValue Src = N->getOperand(0);
5857958580
SDLoc DL(N);
@@ -58641,6 +58642,16 @@ static SDValue combineSCALAR_TO_VECTOR(SDNode *N, SelectionDAG &DAG,
5864158642
return DAG.getNode(X86ISD::MOVQ2DQ, DL, VT, SrcOp);
5864258643
}
5864358644

58645+
if (VT == MVT::v4i32) {
58646+
SDValue HalfSrc;
58647+
// Combine (v4i32 (scalar_to_vector (i32 (anyext (bitcast (f16))))))
58648+
// to remove XMM->GPR->XMM moves.
58649+
if (sd_match(Src, m_AnyExt(m_BitCast(
58650+
m_AllOf(m_SpecificVT(MVT::f16), m_Value(HalfSrc))))))
58651+
return DAG.getBitcast(
58652+
VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f16, HalfSrc));
58653+
}
58654+
5864458655
// See if we're broadcasting the scalar value, in which case just reuse that.
5864558656
// Ensure the same SDValue from the SDNode use is being used.
5864658657
if (VT.getScalarType() == Src.getValueType())

llvm/test/CodeGen/X86/bfloat.ll

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -708,10 +708,8 @@ define <2 x bfloat> @pr62997(bfloat %a, bfloat %b) {
708708
;
709709
; BF16-LABEL: pr62997:
710710
; BF16: # %bb.0:
711-
; BF16-NEXT: vpextrw $0, %xmm0, %eax
712-
; BF16-NEXT: vpextrw $0, %xmm1, %ecx
713-
; BF16-NEXT: vmovd %eax, %xmm0
714-
; BF16-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
711+
; BF16-NEXT: vpextrw $0, %xmm1, %eax
712+
; BF16-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
715713
; BF16-NEXT: retq
716714
;
717715
; FP16-LABEL: pr62997:
@@ -1652,66 +1650,63 @@ define <8 x bfloat> @fptrunc_v8f64(<8 x double> %a) nounwind {
16521650
; AVXNC-NEXT: pushq %r12
16531651
; AVXNC-NEXT: pushq %rbx
16541652
; AVXNC-NEXT: subq $168, %rsp
1655-
; AVXNC-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1653+
; AVXNC-NEXT: vmovups %ymm1, (%rsp) # 32-byte Spill
16561654
; AVXNC-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
16571655
; AVXNC-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
16581656
; AVXNC-NEXT: vzeroupper
16591657
; AVXNC-NEXT: callq __truncdfbf2@PLT
16601658
; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1661-
; AVXNC-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1659+
; AVXNC-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1660+
; AVXNC-NEXT: # xmm0 = mem[1,0]
1661+
; AVXNC-NEXT: callq __truncdfbf2@PLT
1662+
; AVXNC-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1663+
; AVXNC-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
16621664
; AVXNC-NEXT: vextractf128 $1, %ymm0, %xmm0
1663-
; AVXNC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1665+
; AVXNC-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1666+
; AVXNC-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
16641667
; AVXNC-NEXT: vzeroupper
16651668
; AVXNC-NEXT: callq __truncdfbf2@PLT
1666-
; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1667-
; AVXNC-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
1668-
; AVXNC-NEXT: # xmm0 = mem[1,0]
1669-
; AVXNC-NEXT: callq __truncdfbf2@PLT
1670-
; AVXNC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
1671-
; AVXNC-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1669+
; AVXNC-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1670+
; AVXNC-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
16721671
; AVXNC-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
16731672
; AVXNC-NEXT: vzeroupper
16741673
; AVXNC-NEXT: callq __truncdfbf2@PLT
16751674
; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1676-
; AVXNC-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1675+
; AVXNC-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
16771676
; AVXNC-NEXT: # xmm0 = mem[1,0]
16781677
; AVXNC-NEXT: callq __truncdfbf2@PLT
16791678
; AVXNC-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1680-
; AVXNC-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1679+
; AVXNC-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
16811680
; AVXNC-NEXT: vextractf128 $1, %ymm0, %xmm0
16821681
; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
16831682
; AVXNC-NEXT: vzeroupper
16841683
; AVXNC-NEXT: callq __truncdfbf2@PLT
1685-
; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1684+
; AVXNC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
16861685
; AVXNC-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
16871686
; AVXNC-NEXT: # xmm0 = mem[1,0]
16881687
; AVXNC-NEXT: callq __truncdfbf2@PLT
1689-
; AVXNC-NEXT: vpextrw $0, %xmm0, %eax
1690-
; AVXNC-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1691-
; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1688+
; AVXNC-NEXT: vpextrw $0, %xmm0, %ebx
1689+
; AVXNC-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
16921690
; AVXNC-NEXT: vpextrw $0, %xmm0, %ebp
16931691
; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
16941692
; AVXNC-NEXT: vpextrw $0, %xmm0, %r14d
16951693
; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
16961694
; AVXNC-NEXT: vpextrw $0, %xmm0, %r15d
1697-
; AVXNC-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
1695+
; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
16981696
; AVXNC-NEXT: vpextrw $0, %xmm0, %r12d
16991697
; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
17001698
; AVXNC-NEXT: vpextrw $0, %xmm0, %r13d
17011699
; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1702-
; AVXNC-NEXT: vpextrw $0, %xmm0, %ebx
1703-
; AVXNC-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1704-
; AVXNC-NEXT: # xmm0 = mem[1,0]
17051700
; AVXNC-NEXT: callq __truncdfbf2@PLT
17061701
; AVXNC-NEXT: vpextrw $0, %xmm0, %eax
1707-
; AVXNC-NEXT: vmovd %ebx, %xmm0
1708-
; AVXNC-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
1709-
; AVXNC-NEXT: vpinsrw $2, %r13d, %xmm0, %xmm0
1702+
; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1703+
; AVXNC-NEXT: vpinsrw $1, %r13d, %xmm0, %xmm0
1704+
; AVXNC-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
17101705
; AVXNC-NEXT: vpinsrw $3, %r12d, %xmm0, %xmm0
17111706
; AVXNC-NEXT: vpinsrw $4, %r15d, %xmm0, %xmm0
17121707
; AVXNC-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0
17131708
; AVXNC-NEXT: vpinsrw $6, %ebp, %xmm0, %xmm0
1714-
; AVXNC-NEXT: vpinsrw $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
1709+
; AVXNC-NEXT: vpinsrw $7, %ebx, %xmm0, %xmm0
17151710
; AVXNC-NEXT: addq $168, %rsp
17161711
; AVXNC-NEXT: popq %rbx
17171712
; AVXNC-NEXT: popq %r12

llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,11 +133,7 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) nounwind {
133133
;
134134
; AVX512-LABEL: complex_canonicalize_fmul_half:
135135
; AVX512: # %bb.0: # %entry
136-
; AVX512-NEXT: vpextrw $0, %xmm1, %eax
137-
; AVX512-NEXT: vpextrw $0, %xmm0, %ecx
138-
; AVX512-NEXT: vmovd %ecx, %xmm0
139136
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
140-
; AVX512-NEXT: vmovd %eax, %xmm1
141137
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
142138
; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0
143139
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0

llvm/test/CodeGen/X86/cvt16.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,7 @@ define float @test2(ptr nocapture %src) nounwind {
5959
;
6060
; F16C-LABEL: test2:
6161
; F16C: # %bb.0:
62-
; F16C-NEXT: movzwl (%rdi), %eax
63-
; F16C-NEXT: vmovd %eax, %xmm0
62+
; F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
6463
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
6564
; F16C-NEXT: retq
6665
;
@@ -119,8 +118,7 @@ define double @test4(ptr nocapture %src) nounwind {
119118
;
120119
; F16C-LABEL: test4:
121120
; F16C: # %bb.0:
122-
; F16C-NEXT: movzwl (%rdi), %eax
123-
; F16C-NEXT: vmovd %eax, %xmm0
121+
; F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
124122
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
125123
; F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
126124
; F16C-NEXT: retq

llvm/test/CodeGen/X86/fp-roundeven.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,6 @@ define half @roundeven_f16(half %h) {
5050
;
5151
; AVX512F-LABEL: roundeven_f16:
5252
; AVX512F: ## %bb.0: ## %entry
53-
; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
54-
; AVX512F-NEXT: vmovd %eax, %xmm0
5553
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
5654
; AVX512F-NEXT: vroundss $8, %xmm0, %xmm0, %xmm0
5755
; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0

llvm/test/CodeGen/X86/fp16-libcalls.ll

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
define void @test_half_ceil(half %a0, ptr %p0) nounwind {
1010
; F16C-LABEL: test_half_ceil:
1111
; F16C: # %bb.0:
12-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
13-
; F16C-NEXT: vmovd %eax, %xmm0
1412
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1513
; F16C-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
1614
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -107,8 +105,6 @@ define void @test_half_cos(half %a0, ptr %p0) nounwind {
107105
; F16C: # %bb.0:
108106
; F16C-NEXT: pushq %rbx
109107
; F16C-NEXT: movq %rdi, %rbx
110-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
111-
; F16C-NEXT: vmovd %eax, %xmm0
112108
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
113109
; F16C-NEXT: callq cosf@PLT
114110
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -168,8 +164,6 @@ define void @test_half_exp(half %a0, ptr %p0) nounwind {
168164
; F16C: # %bb.0:
169165
; F16C-NEXT: pushq %rbx
170166
; F16C-NEXT: movq %rdi, %rbx
171-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
172-
; F16C-NEXT: vmovd %eax, %xmm0
173167
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
174168
; F16C-NEXT: callq expf@PLT
175169
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -229,8 +223,6 @@ define void @test_half_exp2(half %a0, ptr %p0) nounwind {
229223
; F16C: # %bb.0:
230224
; F16C-NEXT: pushq %rbx
231225
; F16C-NEXT: movq %rdi, %rbx
232-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
233-
; F16C-NEXT: vmovd %eax, %xmm0
234226
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
235227
; F16C-NEXT: callq exp2f@PLT
236228
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -290,8 +282,6 @@ define void @test_half_exp10(half %a0, ptr %p0) nounwind {
290282
; F16C: # %bb.0:
291283
; F16C-NEXT: pushq %rbx
292284
; F16C-NEXT: movq %rdi, %rbx
293-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
294-
; F16C-NEXT: vmovd %eax, %xmm0
295285
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
296286
; F16C-NEXT: callq exp10f@PLT
297287
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -349,8 +339,6 @@ define void @test_half_exp10(half %a0, ptr %p0) nounwind {
349339
define void @test_half_fabs(half %a0, ptr %p0) nounwind {
350340
; F16C-LABEL: test_half_fabs:
351341
; F16C: # %bb.0:
352-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
353-
; F16C-NEXT: vmovd %eax, %xmm0
354342
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
355343
; F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
356344
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -392,8 +380,6 @@ define void @test_half_fabs(half %a0, ptr %p0) nounwind {
392380
define void @test_half_floor(half %a0, ptr %p0) nounwind {
393381
; F16C-LABEL: test_half_floor:
394382
; F16C: # %bb.0:
395-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
396-
; F16C-NEXT: vmovd %eax, %xmm0
397383
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
398384
; F16C-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
399385
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -447,14 +433,8 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind {
447433
; F16C: # %bb.0:
448434
; F16C-NEXT: pushq %rbx
449435
; F16C-NEXT: movq %rdi, %rbx
450-
; F16C-NEXT: vpextrw $0, %xmm2, %eax
451-
; F16C-NEXT: vpextrw $0, %xmm1, %ecx
452-
; F16C-NEXT: vpextrw $0, %xmm0, %edx
453-
; F16C-NEXT: vmovd %edx, %xmm0
454436
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
455-
; F16C-NEXT: vmovd %ecx, %xmm1
456437
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
457-
; F16C-NEXT: vmovd %eax, %xmm2
458438
; F16C-NEXT: vcvtph2ps %xmm2, %xmm2
459439
; F16C-NEXT: callq fmaf@PLT
460440
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -542,8 +522,6 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind {
542522
define void @test_half_fneg(half %a0, ptr %p0) nounwind {
543523
; F16C-LABEL: test_half_fneg:
544524
; F16C: # %bb.0:
545-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
546-
; F16C-NEXT: vmovd %eax, %xmm0
547525
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
548526
; F16C-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
549527
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -587,8 +565,6 @@ define void @test_half_log(half %a0, ptr %p0) nounwind {
587565
; F16C: # %bb.0:
588566
; F16C-NEXT: pushq %rbx
589567
; F16C-NEXT: movq %rdi, %rbx
590-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
591-
; F16C-NEXT: vmovd %eax, %xmm0
592568
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
593569
; F16C-NEXT: callq logf@PLT
594570
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -648,8 +624,6 @@ define void @test_half_log2(half %a0, ptr %p0) nounwind {
648624
; F16C: # %bb.0:
649625
; F16C-NEXT: pushq %rbx
650626
; F16C-NEXT: movq %rdi, %rbx
651-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
652-
; F16C-NEXT: vmovd %eax, %xmm0
653627
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
654628
; F16C-NEXT: callq log2f@PLT
655629
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -709,8 +683,6 @@ define void @test_half_log10(half %a0, ptr %p0) nounwind {
709683
; F16C: # %bb.0:
710684
; F16C-NEXT: pushq %rbx
711685
; F16C-NEXT: movq %rdi, %rbx
712-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
713-
; F16C-NEXT: vmovd %eax, %xmm0
714686
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
715687
; F16C-NEXT: callq log10f@PLT
716688
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -768,8 +740,6 @@ define void @test_half_log10(half %a0, ptr %p0) nounwind {
768740
define void @test_half_nearbyint(half %a0, ptr %p0) nounwind {
769741
; F16C-LABEL: test_half_nearbyint:
770742
; F16C: # %bb.0:
771-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
772-
; F16C-NEXT: vmovd %eax, %xmm0
773743
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
774744
; F16C-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
775745
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -823,11 +793,7 @@ define void @test_half_pow(half %a0, half %a1, ptr %p0) nounwind {
823793
; F16C: # %bb.0:
824794
; F16C-NEXT: pushq %rbx
825795
; F16C-NEXT: movq %rdi, %rbx
826-
; F16C-NEXT: vpextrw $0, %xmm1, %eax
827-
; F16C-NEXT: vpextrw $0, %xmm0, %ecx
828-
; F16C-NEXT: vmovd %ecx, %xmm0
829796
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
830-
; F16C-NEXT: vmovd %eax, %xmm1
831797
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
832798
; F16C-NEXT: callq powf@PLT
833799
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -907,8 +873,6 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
907873
; F16C: # %bb.0:
908874
; F16C-NEXT: pushq %rbx
909875
; F16C-NEXT: movq %rsi, %rbx
910-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
911-
; F16C-NEXT: vmovd %eax, %xmm0
912876
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
913877
; F16C-NEXT: callq __powisf2@PLT
914878
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -976,8 +940,6 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
976940
define void @test_half_rint(half %a0, ptr %p0) nounwind {
977941
; F16C-LABEL: test_half_rint:
978942
; F16C: # %bb.0:
979-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
980-
; F16C-NEXT: vmovd %eax, %xmm0
981943
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
982944
; F16C-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
983945
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -1031,8 +993,6 @@ define void @test_half_sin(half %a0, ptr %p0) nounwind {
1031993
; F16C: # %bb.0:
1032994
; F16C-NEXT: pushq %rbx
1033995
; F16C-NEXT: movq %rdi, %rbx
1034-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
1035-
; F16C-NEXT: vmovd %eax, %xmm0
1036996
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1037997
; F16C-NEXT: callq sinf@PLT
1038998
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -1090,8 +1050,6 @@ define void @test_half_sin(half %a0, ptr %p0) nounwind {
10901050
define void @test_half_sqrt(half %a0, ptr %p0) nounwind {
10911051
; F16C-LABEL: test_half_sqrt:
10921052
; F16C: # %bb.0:
1093-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
1094-
; F16C-NEXT: vmovd %eax, %xmm0
10951053
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
10961054
; F16C-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
10971055
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -1146,8 +1104,6 @@ define void @test_half_tan(half %a0, ptr %p0) nounwind {
11461104
; F16C: # %bb.0:
11471105
; F16C-NEXT: pushq %rbx
11481106
; F16C-NEXT: movq %rdi, %rbx
1149-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
1150-
; F16C-NEXT: vmovd %eax, %xmm0
11511107
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
11521108
; F16C-NEXT: callq tanf@PLT
11531109
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -1205,8 +1161,6 @@ define void @test_half_tan(half %a0, ptr %p0) nounwind {
12051161
define void @test_half_trunc(half %a0, ptr %p0) nounwind {
12061162
; F16C-LABEL: test_half_trunc:
12071163
; F16C: # %bb.0:
1208-
; F16C-NEXT: vpextrw $0, %xmm0, %eax
1209-
; F16C-NEXT: vmovd %eax, %xmm0
12101164
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
12111165
; F16C-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
12121166
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0

llvm/test/CodeGen/X86/half-darwin.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,7 @@ define float @extendhfsf(ptr %ptr) nounwind {
7676
;
7777
; CHECK-F16C-LABEL: extendhfsf:
7878
; CHECK-F16C: ## %bb.0:
79-
; CHECK-F16C-NEXT: movzwl (%rdi), %eax
80-
; CHECK-F16C-NEXT: vmovd %eax, %xmm0
79+
; CHECK-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
8180
; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
8281
; CHECK-F16C-NEXT: retq
8382
;

llvm/test/CodeGen/X86/half-fp80-darwin.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@ define void @extendhfxf(ptr %outptr, ptr %inptr) nounwind {
1919
;
2020
; CHECK-F16C-LABEL: extendhfxf:
2121
; CHECK-F16C: ## %bb.0:
22-
; CHECK-F16C-NEXT: movzwl (%rsi), %eax
23-
; CHECK-F16C-NEXT: vmovd %eax, %xmm0
22+
; CHECK-F16C-NEXT: vpinsrw $0, (%rsi), %xmm0, %xmm0
2423
; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
2524
; CHECK-F16C-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp)
2625
; CHECK-F16C-NEXT: flds -{{[0-9]+}}(%rsp)

0 commit comments

Comments
 (0)