Skip to content

[DAG] Don't split f64 constant stores if the fp imm is legal #74622

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20940,8 +20940,8 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
Ptr, ST->getMemOperand());
}

if (ST->isSimple() &&
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
!TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
// Many FP stores are not made apparent until after legalize, e.g. for
// argument passing. Since this is so common, custom legalize the
// 64-bit integer store into two 32-bit stores.
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
ST->getOriginalAlign(), MMOFlags, AAInfo);
}

if (CFP->getValueType(0) == MVT::f64) {
if (CFP->getValueType(0) == MVT::f64 &&
!TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
// If this target supports 64-bit registers, do a single 64-bit store.
if (TLI.isTypeLegal(MVT::i64)) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/ARM/aapcs-hfa-code.ll
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,8 @@ define arm_aapcs_vfpcc void @test_1double_nosplit([4 x float], [4 x double], [3
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, sp, #8
; CHECK-NEXT: movw r1, #0
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movt r1, #16368
; CHECK-NEXT: strd r0, r1, [sp]
; CHECK-NEXT: vmov.f64 d16, #1.000000e+00
; CHECK-NEXT: vstr d16, [sp]
; CHECK-NEXT: bl test_1double_nosplit
; CHECK-NEXT: add sp, sp, #8
; CHECK-NEXT: pop {r11, pc}
Expand Down Expand Up @@ -138,10 +136,8 @@ define arm_aapcs_vfpcc void @test_1double_misaligned([4 x double], [4 x double],
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: movw r1, #0
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movt r1, #16368
; CHECK-NEXT: strd r0, r1, [sp, #8]
; CHECK-NEXT: vmov.f64 d16, #1.000000e+00
; CHECK-NEXT: vstr d16, [sp, #8]
; CHECK-NEXT: bl test_1double_misaligned
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: pop {r11, pc}
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/ARM/ha-alignstack-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -300,16 +300,16 @@ entry:
ret double %call
}
; CHECK-LABEL: g2_1_call:
; CHECK: movw r0, #0
; CHECK: mov r1, #0
; CHECK: movt r0, #16352
; CHECK: str r1, [sp]
; CHECK: stmib sp, {r0, r1}
; CHECK: str r1, [sp, #12]
; CHECK: str r1, [sp, #16]
; CHECK: str r1, [sp, #20]
; CHECK: str r1, [sp, #24]
; CHECK: str r1, [sp, #28]
; CHECK: vmov.f64 d16, #5.000000e-01
; CHECK: mov r0, #0
; CHECK: str r0, [sp, #8]
; CHECK: str r0, [sp, #12]
; CHECK: str r0, [sp, #16]
; CHECK: vmov.i32 d0, #0x0
; CHECK: str r0, [sp, #20]
; CHECK: str r0, [sp, #24]
; CHECK: str r0, [sp, #28]
; CHECK: vstr d16, [sp]
; CHECK: bl g2_1

; pass in memory, alignment 8
Expand Down
9 changes: 4 additions & 5 deletions llvm/test/CodeGen/Mips/pr49200.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,10 @@ define dso_local void @foo() #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addiusp -24
; CHECK-NEXT: li16 $2, 0
; CHECK-NEXT: sw $2, 4($sp)
; CHECK-NEXT: sw $2, 0($sp)
; CHECK-NEXT: sw $2, 12($sp)
; CHECK-NEXT: sw $2, 8($sp)
; CHECK-NEXT: mtc1 $zero, $f0
; CHECK-NEXT: mthc1 $zero, $f0
; CHECK-NEXT: sdc1 $f0, 0($sp)
; CHECK-NEXT: sdc1 $f0, 8($sp)
; CHECK-NEXT: ldc1 $f0, 0($sp)
; CHECK-NEXT: sdc1 $f0, 16($sp)
; CHECK-NEXT: addiusp 24
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/fp-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -865,9 +865,9 @@ define double @f19() #0 {
; X87-NEXT: .cfi_def_cfa_offset 32
; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
; X87-NEXT: fld1
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: movl $1072693248, {{[0-9]+}}(%esp) # imm = 0x3FF00000
; X87-NEXT: movl $0, (%esp)
; X87-NEXT: calll fmod
; X87-NEXT: addl $28, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
Expand Down
7 changes: 4 additions & 3 deletions llvm/test/CodeGen/X86/ldexp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,11 @@ define double @ldexp_f64(i8 zeroext %x) {
;
; WIN32-LABEL: ldexp_f64:
; WIN32: # %bb.0:
; WIN32-NEXT: subl $12, %esp
; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: pushl %eax
; WIN32-NEXT: pushl $1072693248 # imm = 0x3FF00000
; WIN32-NEXT: pushl $0
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: fld1
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _ldexp
; WIN32-NEXT: addl $12, %esp
; WIN32-NEXT: retl
Expand Down
31 changes: 11 additions & 20 deletions llvm/test/CodeGen/X86/memset64-on-x86-32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,17 @@ define void @bork(ptr nocapture align 4 %dst) nounwind {
; SLOW_32-LABEL: bork:
; SLOW_32: # %bb.0:
; SLOW_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SLOW_32-NEXT: movl $0, 4(%eax)
; SLOW_32-NEXT: movl $0, (%eax)
; SLOW_32-NEXT: movl $0, 12(%eax)
; SLOW_32-NEXT: movl $0, 8(%eax)
; SLOW_32-NEXT: movl $0, 20(%eax)
; SLOW_32-NEXT: movl $0, 16(%eax)
; SLOW_32-NEXT: movl $0, 28(%eax)
; SLOW_32-NEXT: movl $0, 24(%eax)
; SLOW_32-NEXT: movl $0, 36(%eax)
; SLOW_32-NEXT: movl $0, 32(%eax)
; SLOW_32-NEXT: movl $0, 44(%eax)
; SLOW_32-NEXT: movl $0, 40(%eax)
; SLOW_32-NEXT: movl $0, 52(%eax)
; SLOW_32-NEXT: movl $0, 48(%eax)
; SLOW_32-NEXT: movl $0, 60(%eax)
; SLOW_32-NEXT: movl $0, 56(%eax)
; SLOW_32-NEXT: movl $0, 68(%eax)
; SLOW_32-NEXT: movl $0, 64(%eax)
; SLOW_32-NEXT: movl $0, 76(%eax)
; SLOW_32-NEXT: movl $0, 72(%eax)
; SLOW_32-NEXT: xorps %xmm0, %xmm0
; SLOW_32-NEXT: movsd %xmm0, 72(%eax)
; SLOW_32-NEXT: movsd %xmm0, 64(%eax)
; SLOW_32-NEXT: movsd %xmm0, 56(%eax)
; SLOW_32-NEXT: movsd %xmm0, 48(%eax)
; SLOW_32-NEXT: movsd %xmm0, 40(%eax)
; SLOW_32-NEXT: movsd %xmm0, 32(%eax)
; SLOW_32-NEXT: movsd %xmm0, 24(%eax)
; SLOW_32-NEXT: movsd %xmm0, 16(%eax)
; SLOW_32-NEXT: movsd %xmm0, 8(%eax)
; SLOW_32-NEXT: movsd %xmm0, (%eax)
; SLOW_32-NEXT: retl
;
; SLOW_64-LABEL: bork:
Expand Down
25 changes: 9 additions & 16 deletions llvm/test/CodeGen/X86/pr38738.ll
Original file line number Diff line number Diff line change
Expand Up @@ -130,22 +130,15 @@ define void @tryset(ptr nocapture %x) {
; X86SSE2-LABEL: tryset:
; X86SSE2: # %bb.0:
; X86SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86SSE2-NEXT: movl $0, 4(%eax)
; X86SSE2-NEXT: movl $0, (%eax)
; X86SSE2-NEXT: movl $0, 12(%eax)
; X86SSE2-NEXT: movl $0, 8(%eax)
; X86SSE2-NEXT: movl $0, 20(%eax)
; X86SSE2-NEXT: movl $0, 16(%eax)
; X86SSE2-NEXT: movl $0, 28(%eax)
; X86SSE2-NEXT: movl $0, 24(%eax)
; X86SSE2-NEXT: movl $0, 36(%eax)
; X86SSE2-NEXT: movl $0, 32(%eax)
; X86SSE2-NEXT: movl $0, 44(%eax)
; X86SSE2-NEXT: movl $0, 40(%eax)
; X86SSE2-NEXT: movl $0, 52(%eax)
; X86SSE2-NEXT: movl $0, 48(%eax)
; X86SSE2-NEXT: movl $0, 60(%eax)
; X86SSE2-NEXT: movl $0, 56(%eax)
; X86SSE2-NEXT: xorps %xmm0, %xmm0
; X86SSE2-NEXT: movsd %xmm0, 56(%eax)
; X86SSE2-NEXT: movsd %xmm0, 48(%eax)
; X86SSE2-NEXT: movsd %xmm0, 40(%eax)
; X86SSE2-NEXT: movsd %xmm0, 32(%eax)
; X86SSE2-NEXT: movsd %xmm0, 24(%eax)
; X86SSE2-NEXT: movsd %xmm0, 16(%eax)
; X86SSE2-NEXT: movsd %xmm0, 8(%eax)
; X86SSE2-NEXT: movsd %xmm0, (%eax)
; X86SSE2-NEXT: retl
;
; X64AVX-LABEL: tryset:
Expand Down
94 changes: 54 additions & 40 deletions llvm/test/CodeGen/X86/slow-unaligned-mem.ll
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
; Intel chips with slow unaligned memory accesses

; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=penryn 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bonnell 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SCALAR
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SCALAR
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=penryn 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bonnell 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE

; Intel chips with fast unaligned memory accesses

Expand All @@ -26,15 +26,15 @@

; AMD chips with slow unaligned memory accesses

; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-4 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-xp 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-fx 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-4 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SCALAR
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-xp 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SCALAR
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-fx 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE

; AMD chips with fast unaligned memory accesses

Expand Down Expand Up @@ -67,26 +67,40 @@
; SLOW-NOT: not a recognized processor
; FAST-NOT: not a recognized processor
define void @store_zeros(ptr %a) {
; SLOW-LABEL: store_zeros:
; SLOW: # %bb.0:
; SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; SLOW-NEXT: movl $0
; SLOW-NEXT: movl $0
; SLOW-NEXT: movl $0
; SLOW-NEXT: movl $0
; SLOW-NEXT: movl $0
; SLOW-NEXT: movl $0
; SLOW-NEXT: movl $0
; SLOW-NEXT: movl $0
; SLOW-NEXT: movl $0
; SLOW-NEXT: movl $0
; SLOW-NEXT: movl $0
; SLOW-NEXT: movl $0
; SLOW-NEXT: movl $0
; SLOW-NEXT: movl $0
; SLOW-NEXT: movl $0
; SLOW-NEXT: movl $0
; SLOW-NOT: movl
; SLOW-SCALAR-LABEL: store_zeros:
; SLOW-SCALAR: # %bb.0:
; SLOW-SCALAR-NEXT: movl {{[0-9]+}}(%esp), %eax
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NEXT: movl $0
; SLOW-SCALAR-NOT: movl
;
; SLOW-SSE-LABEL: store_zeros:
; SLOW-SSE: # %bb.0:
; SLOW-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; SLOW-SSE-NEXT: xorps %xmm0, %xmm0
; SLOW-SSE-NEXT: movsd %xmm0
; SLOW-SSE-NEXT: movsd %xmm0
; SLOW-SSE-NEXT: movsd %xmm0
; SLOW-SSE-NEXT: movsd %xmm0
; SLOW-SSE-NEXT: movsd %xmm0
; SLOW-SSE-NEXT: movsd %xmm0
; SLOW-SSE-NEXT: movsd %xmm0
; SLOW-SSE-NEXT: movsd %xmm0
; SLOW-SSE-NOT: movsd
;
; FAST-SSE-LABEL: store_zeros:
; FAST-SSE: # %bb.0:
Expand Down
7 changes: 4 additions & 3 deletions llvm/test/CodeGen/X86/zero-remat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ define double @foo() nounwind {
;
; CHECK-32-LABEL: foo:
; CHECK-32: # %bb.0:
; CHECK-32-NEXT: pushl $0
; CHECK-32-NEXT: pushl $0
; CHECK-32-NEXT: subl $8, %esp
; CHECK-32-NEXT: fldz
; CHECK-32-NEXT: fstpl (%esp)
; CHECK-32-NEXT: calll bar@PLT
; CHECK-32-NEXT: addl $8, %esp
; CHECK-32-NEXT: fldz
; CHECK-32-NEXT: addl $8, %esp
; CHECK-32-NEXT: retl
call void @bar(double 0.0)
ret double 0.0
Expand Down