Skip to content

Commit bfd8f7e

Browse files
committed
[X86] SimplifyDemandedVectorEltsForTargetNode - reduce vector width of FRSQRT/FRCP ymm nodes.
If we only demand the lower subvector of a FRSQRT/FRCP node, then reduce the width of the instruction.
1 parent cc7b24a commit bfd8f7e

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43109,6 +43109,8 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4310943109
case X86ISD::FMIN:
4311043110
case X86ISD::FMAXC:
4311143111
case X86ISD::FMINC:
43112+
case X86ISD::FRSQRT:
43113+
case X86ISD::FRCP:
4311243114
// Horizontal Ops.
4311343115
case X86ISD::HADD:
4311443116
case X86ISD::HSUB:

llvm/test/CodeGen/X86/extractelement-fp.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1310,15 +1310,14 @@ define float @rcp_v4f32(<4 x float> %x) nounwind {
13101310
define float @rcp_v8f32(<8 x float> %x) nounwind {
13111311
; X64-LABEL: rcp_v8f32:
13121312
; X64: # %bb.0:
1313-
; X64-NEXT: vrcpps %ymm0, %ymm0
1314-
; X64-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1313+
; X64-NEXT: vrcpss %xmm0, %xmm0, %xmm0
13151314
; X64-NEXT: vzeroupper
13161315
; X64-NEXT: retq
13171316
;
13181317
; X86-LABEL: rcp_v8f32:
13191318
; X86: # %bb.0:
13201319
; X86-NEXT: pushl %eax
1321-
; X86-NEXT: vrcpps %ymm0, %ymm0
1320+
; X86-NEXT: vrcpss %xmm0, %xmm0, %xmm0
13221321
; X86-NEXT: vmovss %xmm0, (%esp)
13231322
; X86-NEXT: flds (%esp)
13241323
; X86-NEXT: popl %eax
@@ -1351,15 +1350,14 @@ define float @rsqrt_v4f32(<4 x float> %x) nounwind {
13511350
define float @rsqrt_v8f32(<8 x float> %x) nounwind {
13521351
; X64-LABEL: rsqrt_v8f32:
13531352
; X64: # %bb.0:
1354-
; X64-NEXT: vrsqrtps %ymm0, %ymm0
1355-
; X64-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1353+
; X64-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0
13561354
; X64-NEXT: vzeroupper
13571355
; X64-NEXT: retq
13581356
;
13591357
; X86-LABEL: rsqrt_v8f32:
13601358
; X86: # %bb.0:
13611359
; X86-NEXT: pushl %eax
1362-
; X86-NEXT: vrsqrtps %ymm0, %ymm0
1360+
; X86-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0
13631361
; X86-NEXT: vmovss %xmm0, (%esp)
13641362
; X86-NEXT: flds (%esp)
13651363
; X86-NEXT: popl %eax

0 commit comments

Comments
 (0)