Skip to content

DAG: Pass flags to FoldConstantArithmetic #93663

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/include/llvm/CodeGen/SelectionDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -1880,7 +1880,8 @@ class SelectionDAG {
const SDNode *N2);

SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops);
ArrayRef<SDValue> Ops,
SDNodeFlags Flags = SDNodeFlags());

/// Fold floating-point operations when all operands are constants and/or
/// undefined.
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6306,7 +6306,8 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
}

SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
EVT VT, ArrayRef<SDValue> Ops) {
EVT VT, ArrayRef<SDValue> Ops,
SDNodeFlags Flags) {
// If the opcode is a target-specific ISD node, there's nothing we can
// do here and the operand rules may not line up with the below, so
// bail early.
Expand Down Expand Up @@ -6663,7 +6664,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
}

// Constant fold the scalar operands.
SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps);
SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);

// Legalize the (integer) scalar constant if necessary.
if (LegalSVT != SVT)
Expand Down Expand Up @@ -7234,7 +7235,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}

// Perform trivial constant folding.
if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}))
if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags))
return SV;

// Canonicalize an UNDEF to the RHS, even over a constant.
Expand Down
14 changes: 13 additions & 1 deletion llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,19 @@ define float @test_v3f32_ninf(<3 x float> %a) nounwind {
define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-LABEL: test_v2f128:
; CHECK: // %bb.0:
; CHECK-NEXT: b fmaxl
; CHECK-NEXT: sub sp, sp, #48
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
; CHECK-NEXT: bl __gttf2
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: b.le .LBB18_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: .LBB18_2:
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #48
; CHECK-NEXT: ret
%b = call nnan fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
ret fp128 %b
}
14 changes: 13 additions & 1 deletion llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,19 @@ define float @test_v3f32_ninf(<3 x float> %a) nounwind {
define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-LABEL: test_v2f128:
; CHECK: // %bb.0:
; CHECK-NEXT: b fminl
; CHECK-NEXT: sub sp, sp, #48
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
; CHECK-NEXT: bl __lttf2
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: b.ge .LBB18_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: .LBB18_2:
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #48
; CHECK-NEXT: ret
%b = call nnan fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a)
ret fp128 %b
}
138 changes: 48 additions & 90 deletions llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -654,21 +654,16 @@ define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
; GFX7-LABEL: v_maximum_v2f16__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f16__nnan:
Expand Down Expand Up @@ -847,21 +842,16 @@ define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
; GFX7-LABEL: v_maximum_v2f16__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f16__nnan_nsz:
Expand Down Expand Up @@ -1216,28 +1206,21 @@ define <3 x half> @v_maximum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
; GFX7-LABEL: v_maximum_v3f16__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f16__nnan:
Expand Down Expand Up @@ -1427,28 +1410,21 @@ define <3 x half> @v_maximum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
; GFX7-LABEL: v_maximum_v3f16__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f16__nnan_nsz:
Expand Down Expand Up @@ -1671,35 +1647,26 @@ define <4 x half> @v_maximum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
; GFX7-LABEL: v_maximum_v4f16__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f16__nnan:
Expand Down Expand Up @@ -1924,35 +1891,26 @@ define <4 x half> @v_maximum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1)
; GFX7-LABEL: v_maximum_v4f16__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f16__nnan_nsz:
Expand Down
Loading
Loading