Skip to content

[GlobalISel] Add constant-folding of FP binops to combiner. #65230

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,9 @@ class CombinerHelper {
/// Replace an instruction with a G_FCONSTANT with value \p C.
void replaceInstWithFConstant(MachineInstr &MI, double C);

/// Replace an instruction with an G_FCONSTANT with value \p CFP.
void replaceInstWithFConstant(MachineInstr &MI, ConstantFP *CFP);

/// Replace an instruction with a G_CONSTANT with value \p C.
void replaceInstWithConstant(MachineInstr &MI, int64_t C);

Expand Down Expand Up @@ -641,6 +644,9 @@ class CombinerHelper {
/// Do constant folding when opportunities are exposed after MIR building.
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo);

/// Do constant FP folding when opportunities are exposed after MIR building.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP* &MatchInfo);

/// \returns true if it is possible to narrow the width of a scalar binop
/// feeding a G_AND instruction \p MI.
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
Expand Down
12 changes: 11 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ def instruction_steps_matchdata: GIDefMatchData<"InstructionStepsMatchInfo">;
def register_matchinfo: GIDefMatchData<"Register">;
def int64_matchinfo: GIDefMatchData<"int64_t">;
def apint_matchinfo : GIDefMatchData<"APInt">;
def constantfp_matchinfo : GIDefMatchData<"ConstantFP*">;
def build_fn_matchinfo :
GIDefMatchData<"std::function<void(MachineIRBuilder &)>">;
def unsigned_matchinfo: GIDefMatchData<"unsigned">;
Expand Down Expand Up @@ -946,6 +947,12 @@ def constant_fold_binop : GICombineRule<
[{ return Helper.matchConstantFoldBinOp(*${d}, ${matchinfo}); }]),
(apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;

def constant_fold_fp_binop : GICombineRule<
(defs root:$d, constantfp_matchinfo:$matchinfo),
(match (wip_match_opcode G_FADD, G_FSUB, G_FMUL, G_FDIV):$d,
[{ return Helper.matchConstantFoldFPBinOp(*${d}, ${matchinfo}); }]),
(apply [{ Helper.replaceInstWithFConstant(*${d}, ${matchinfo}); }])>;

def constant_fold_cast_op : GICombineRule<
(defs root:$d, apint_matchinfo:$matchinfo),
(match (wip_match_opcode G_ZEXT, G_SEXT, G_ANYEXT):$d,
Expand Down Expand Up @@ -1197,6 +1204,9 @@ def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
combine_fsub_fneg_fmul_to_fmad_or_fma, combine_fsub_fpext_fmul_to_fmad_or_fma,
combine_fsub_fpext_fneg_fmul_to_fmad_or_fma]>;

def constant_fold_binops : GICombineGroup<[constant_fold_binop,
constant_fold_fp_binop]>;

def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
extract_vec_elt_combines, combines_for_extload,
combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
Expand All @@ -1211,7 +1221,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
div_rem_to_divrem, funnel_shift_combines, commute_shift,
form_bitfield_extract, constant_fold_binop, constant_fold_cast_op, fabs_fneg_fold,
form_bitfield_extract, constant_fold_binops, constant_fold_cast_op, fabs_fneg_fold,
intdiv_combines, mulh_combines, redundant_neg_operands,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2645,6 +2645,13 @@ void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
MI.eraseFromParent();
}

void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, ConstantFP *CFP) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should really just fix the combiner to always set the insert point before the apply

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed.

Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
MI.eraseFromParent();
}

void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Expand Down Expand Up @@ -4531,6 +4538,17 @@ bool CombinerHelper::matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo)
return true;
}

bool CombinerHelper::matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP* &MatchInfo) {
Register Op1 = MI.getOperand(1).getReg();
Register Op2 = MI.getOperand(2).getReg();
auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
if (!MaybeCst)
return false;
MatchInfo =
ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
return true;
}

bool CombinerHelper::matchNarrowBinopFeedingAnd(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
// Look for a binop feeding into an AND with a mask:
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def AArch64PostLegalizerCombiner
form_bitfield_extract, rotate_out_of_range,
icmp_to_true_false_known_bits, merge_unmerge,
select_combines, fold_merge_to_zext,
constant_fold_binop, identity_combines,
constant_fold_binops, identity_combines,
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
select_to_minmax]> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,113 @@ body: |
RET_ReallyLR implicit $x0

...
---
name: fadd
legalized: true
liveins:
- { reg: '$d0' }
body: |
bb.1.entry:
liveins: $d0

; CHECK-LABEL: name: fadd
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 4.200000e+01
; CHECK-NEXT: $d0 = COPY %res(s64)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%a:_(s64) = G_FCONSTANT double 40.0
%b:_(s64) = G_FCONSTANT double 2.0
%res:_(s64) = G_FADD %a, %b
$d0 = COPY %res(s64)
RET_ReallyLR implicit $d0

...
---
name: fsub
legalized: true
liveins:
- { reg: '$d0' }
body: |
bb.1.entry:
liveins: $d0

; CHECK-LABEL: name: fsub
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 3.800000e+01
; CHECK-NEXT: $d0 = COPY %res(s64)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%a:_(s64) = G_FCONSTANT double 40.0
%b:_(s64) = G_FCONSTANT double 2.0
%res:_(s64) = G_FSUB %a, %b
$d0 = COPY %res(s64)
RET_ReallyLR implicit $d0

...
---
name: fmul
legalized: true
liveins:
- { reg: '$d0' }
body: |
bb.1.entry:
liveins: $d0

; CHECK-LABEL: name: fmul
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 8.000000e+01
; CHECK-NEXT: $d0 = COPY %res(s64)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%a:_(s64) = G_FCONSTANT double 40.0
%b:_(s64) = G_FCONSTANT double 2.0
%res:_(s64) = G_FMUL %a, %b
$d0 = COPY %res(s64)
RET_ReallyLR implicit $d0

...
---
name: fdiv
legalized: true
liveins:
- { reg: '$d0' }
body: |
bb.1.entry:
liveins: $d0

; CHECK-LABEL: name: fdiv
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 2.000000e+01
; CHECK-NEXT: $d0 = COPY %res(s64)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%a:_(s64) = G_FCONSTANT double 40.0
%b:_(s64) = G_FCONSTANT double 2.0
%res:_(s64) = G_FDIV %a, %b
$d0 = COPY %res(s64)
RET_ReallyLR implicit $d0

...
---
name: fadd32
legalized: true
liveins:
- { reg: '$s0' }
body: |
bb.1.entry:
liveins: $s0

; CHECK-LABEL: name: fadd32
; CHECK: liveins: $s0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %res:_(s32) = G_FCONSTANT float 4.200000e+01
; CHECK-NEXT: $s0 = COPY %res(s32)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%a:_(s32) = G_FCONSTANT float 40.0
%b:_(s32) = G_FCONSTANT float 2.0
%res:_(s32) = G_FADD %a, %b
$s0 = COPY %res(s32)
RET_ReallyLR implicit $s0

...
100 changes: 44 additions & 56 deletions llvm/test/CodeGen/AMDGPU/llvm.log.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5783,23 +5783,20 @@ define float @v_log_f32_0() {
; SI-GISEL-LABEL: v_log_f32_0:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, 0
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
; SI-GISEL-NEXT: v_fma_f32 v2, v0, v2, v4
; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -5825,26 +5822,23 @@ define float @v_log_f32_0() {
; VI-GISEL-LABEL: v_log_f32_0:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, 0
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -5867,23 +5861,20 @@ define float @v_log_f32_0() {
; GFX900-GISEL-LABEL: v_log_f32_0:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, 0
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v2, v4
; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -5907,21 +5898,18 @@ define float @v_log_f32_0() {
; GFX1100-GISEL-LABEL: v_log_f32_0:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, 0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, 0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, 0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, 0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
Expand Down
Loading