Skip to content

Commit 1cc9f62

Browse files
authored
[GlobalISel] Add constant-folding of FP binops to combiner. (#65230)
1 parent e592c2d commit 1cc9f62

File tree

7 files changed

+234
-114
lines changed

7 files changed

+234
-114
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,9 @@ class CombinerHelper {
436436
/// Replace an instruction with a G_FCONSTANT with value \p C.
437437
void replaceInstWithFConstant(MachineInstr &MI, double C);
438438

439+
/// Replace an instruction with an G_FCONSTANT with value \p CFP.
440+
void replaceInstWithFConstant(MachineInstr &MI, ConstantFP *CFP);
441+
439442
/// Replace an instruction with a G_CONSTANT with value \p C.
440443
void replaceInstWithConstant(MachineInstr &MI, int64_t C);
441444

@@ -651,6 +654,9 @@ class CombinerHelper {
651654
/// Do constant folding when opportunities are exposed after MIR building.
652655
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo);
653656

657+
/// Do constant FP folding when opportunities are exposed after MIR building.
658+
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP* &MatchInfo);
659+
654660
/// \returns true if it is possible to narrow the width of a scalar binop
655661
/// feeding a G_AND instruction \p MI.
656662
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo);

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ def instruction_steps_matchdata: GIDefMatchData<"InstructionStepsMatchInfo">;
155155
def register_matchinfo: GIDefMatchData<"Register">;
156156
def int64_matchinfo: GIDefMatchData<"int64_t">;
157157
def apint_matchinfo : GIDefMatchData<"APInt">;
158+
def constantfp_matchinfo : GIDefMatchData<"ConstantFP*">;
158159
def build_fn_matchinfo :
159160
GIDefMatchData<"std::function<void(MachineIRBuilder &)>">;
160161
def unsigned_matchinfo: GIDefMatchData<"unsigned">;
@@ -978,6 +979,12 @@ def constant_fold_binop : GICombineRule<
978979
[{ return Helper.matchConstantFoldBinOp(*${d}, ${matchinfo}); }]),
979980
(apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;
980981

982+
def constant_fold_fp_binop : GICombineRule<
983+
(defs root:$d, constantfp_matchinfo:$matchinfo),
984+
(match (wip_match_opcode G_FADD, G_FSUB, G_FMUL, G_FDIV):$d,
985+
[{ return Helper.matchConstantFoldFPBinOp(*${d}, ${matchinfo}); }]),
986+
(apply [{ Helper.replaceInstWithFConstant(*${d}, ${matchinfo}); }])>;
987+
981988
def constant_fold_cast_op : GICombineRule<
982989
(defs root:$d, apint_matchinfo:$matchinfo),
983990
(match (wip_match_opcode G_ZEXT, G_SEXT, G_ANYEXT):$d,
@@ -1229,6 +1236,9 @@ def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
12291236
combine_fsub_fneg_fmul_to_fmad_or_fma, combine_fsub_fpext_fmul_to_fmad_or_fma,
12301237
combine_fsub_fpext_fneg_fmul_to_fmad_or_fma]>;
12311238

1239+
def constant_fold_binops : GICombineGroup<[constant_fold_binop,
1240+
constant_fold_fp_binop]>;
1241+
12321242
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
12331243
extract_vec_elt_combines, combines_for_extload,
12341244
combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
@@ -1243,7 +1253,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
12431253
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
12441254
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
12451255
div_rem_to_divrem, funnel_shift_combines, commute_shift,
1246-
form_bitfield_extract, constant_fold_binop, constant_fold_cast_op, fabs_fneg_fold,
1256+
form_bitfield_extract, constant_fold_binops, constant_fold_cast_op, fabs_fneg_fold,
12471257
intdiv_combines, mulh_combines, redundant_neg_operands,
12481258
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
12491259
sub_add_reg, select_to_minmax, redundant_binop_in_equality,

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2717,6 +2717,13 @@ void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
27172717
MI.eraseFromParent();
27182718
}
27192719

2720+
void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, ConstantFP *CFP) {
2721+
assert(MI.getNumDefs() == 1 && "Expected only one def?");
2722+
Builder.setInstr(MI);
2723+
Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
2724+
MI.eraseFromParent();
2725+
}
2726+
27202727
void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
27212728
assert(MI.getNumDefs() == 1 && "Expected only one def?");
27222729
Builder.setInstr(MI);
@@ -4603,6 +4610,17 @@ bool CombinerHelper::matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo)
46034610
return true;
46044611
}
46054612

4613+
bool CombinerHelper::matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP* &MatchInfo) {
4614+
Register Op1 = MI.getOperand(1).getReg();
4615+
Register Op2 = MI.getOperand(2).getReg();
4616+
auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
4617+
if (!MaybeCst)
4618+
return false;
4619+
MatchInfo =
4620+
ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
4621+
return true;
4622+
}
4623+
46064624
bool CombinerHelper::matchNarrowBinopFeedingAnd(
46074625
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
46084626
// Look for a binop feeding into an AND with a mask:

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ def AArch64PostLegalizerCombiner
239239
form_bitfield_extract, rotate_out_of_range,
240240
icmp_to_true_false_known_bits, merge_unmerge,
241241
select_combines, fold_merge_to_zext,
242-
constant_fold_binop, identity_combines,
242+
constant_fold_binops, identity_combines,
243243
ptr_add_immed_chain, overlapping_and,
244244
split_store_zero_128, undef_combines,
245245
select_to_minmax]> {

llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-constant-fold.mir

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,3 +139,113 @@ body: |
139139
RET_ReallyLR implicit $x0
140140
141141
...
142+
---
143+
name: fadd
144+
legalized: true
145+
liveins:
146+
- { reg: '$d0' }
147+
body: |
148+
bb.1.entry:
149+
liveins: $d0
150+
151+
; CHECK-LABEL: name: fadd
152+
; CHECK: liveins: $d0
153+
; CHECK-NEXT: {{ $}}
154+
; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 4.200000e+01
155+
; CHECK-NEXT: $d0 = COPY %res(s64)
156+
; CHECK-NEXT: RET_ReallyLR implicit $d0
157+
%a:_(s64) = G_FCONSTANT double 40.0
158+
%b:_(s64) = G_FCONSTANT double 2.0
159+
%res:_(s64) = G_FADD %a, %b
160+
$d0 = COPY %res(s64)
161+
RET_ReallyLR implicit $d0
162+
163+
...
164+
---
165+
name: fsub
166+
legalized: true
167+
liveins:
168+
- { reg: '$d0' }
169+
body: |
170+
bb.1.entry:
171+
liveins: $d0
172+
173+
; CHECK-LABEL: name: fsub
174+
; CHECK: liveins: $d0
175+
; CHECK-NEXT: {{ $}}
176+
; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 3.800000e+01
177+
; CHECK-NEXT: $d0 = COPY %res(s64)
178+
; CHECK-NEXT: RET_ReallyLR implicit $d0
179+
%a:_(s64) = G_FCONSTANT double 40.0
180+
%b:_(s64) = G_FCONSTANT double 2.0
181+
%res:_(s64) = G_FSUB %a, %b
182+
$d0 = COPY %res(s64)
183+
RET_ReallyLR implicit $d0
184+
185+
...
186+
---
187+
name: fmul
188+
legalized: true
189+
liveins:
190+
- { reg: '$d0' }
191+
body: |
192+
bb.1.entry:
193+
liveins: $d0
194+
195+
; CHECK-LABEL: name: fmul
196+
; CHECK: liveins: $d0
197+
; CHECK-NEXT: {{ $}}
198+
; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 8.000000e+01
199+
; CHECK-NEXT: $d0 = COPY %res(s64)
200+
; CHECK-NEXT: RET_ReallyLR implicit $d0
201+
%a:_(s64) = G_FCONSTANT double 40.0
202+
%b:_(s64) = G_FCONSTANT double 2.0
203+
%res:_(s64) = G_FMUL %a, %b
204+
$d0 = COPY %res(s64)
205+
RET_ReallyLR implicit $d0
206+
207+
...
208+
---
209+
name: fdiv
210+
legalized: true
211+
liveins:
212+
- { reg: '$d0' }
213+
body: |
214+
bb.1.entry:
215+
liveins: $d0
216+
217+
; CHECK-LABEL: name: fdiv
218+
; CHECK: liveins: $d0
219+
; CHECK-NEXT: {{ $}}
220+
; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 2.000000e+01
221+
; CHECK-NEXT: $d0 = COPY %res(s64)
222+
; CHECK-NEXT: RET_ReallyLR implicit $d0
223+
%a:_(s64) = G_FCONSTANT double 40.0
224+
%b:_(s64) = G_FCONSTANT double 2.0
225+
%res:_(s64) = G_FDIV %a, %b
226+
$d0 = COPY %res(s64)
227+
RET_ReallyLR implicit $d0
228+
229+
...
230+
---
231+
name: fadd32
232+
legalized: true
233+
liveins:
234+
- { reg: '$s0' }
235+
body: |
236+
bb.1.entry:
237+
liveins: $s0
238+
239+
; CHECK-LABEL: name: fadd32
240+
; CHECK: liveins: $s0
241+
; CHECK-NEXT: {{ $}}
242+
; CHECK-NEXT: %res:_(s32) = G_FCONSTANT float 4.200000e+01
243+
; CHECK-NEXT: $s0 = COPY %res(s32)
244+
; CHECK-NEXT: RET_ReallyLR implicit $s0
245+
%a:_(s32) = G_FCONSTANT float 40.0
246+
%b:_(s32) = G_FCONSTANT float 2.0
247+
%res:_(s32) = G_FADD %a, %b
248+
$s0 = COPY %res(s32)
249+
RET_ReallyLR implicit $s0
250+
251+
...

llvm/test/CodeGen/AMDGPU/llvm.log.ll

Lines changed: 44 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -5783,23 +5783,20 @@ define float @v_log_f32_0() {
57835783
; SI-GISEL-LABEL: v_log_f32_0:
57845784
; SI-GISEL: ; %bb.0:
57855785
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5786-
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
5787-
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
5788-
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
5789-
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
5790-
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
5791-
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
5786+
; SI-GISEL-NEXT: v_log_f32_e32 v0, 0
57925787
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5793-
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
5794-
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5795-
; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5796-
; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
5797-
; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5798-
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5799-
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
5800-
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
5801-
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5802-
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5788+
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
5789+
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5790+
; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
5791+
; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
5792+
; SI-GISEL-NEXT: v_fma_f32 v2, v0, v2, v4
5793+
; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
5794+
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5795+
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
5796+
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5797+
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
5798+
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
5799+
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
58035800
; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
58045801
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
58055802
;
@@ -5825,26 +5822,23 @@ define float @v_log_f32_0() {
58255822
; VI-GISEL-LABEL: v_log_f32_0:
58265823
; VI-GISEL: ; %bb.0:
58275824
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5828-
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
5829-
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
5830-
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
5831-
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
5832-
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
5833-
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
5834-
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5835-
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
5836-
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
5825+
; VI-GISEL-NEXT: v_log_f32_e32 v0, 0
5826+
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5827+
; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
5828+
; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
58375829
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
5830+
; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
5831+
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
5832+
; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
58385833
; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
58395834
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
58405835
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
5841-
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
5842-
; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5843-
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5844-
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
5845-
; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
5846-
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5847-
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5836+
; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5837+
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
5838+
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5839+
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
5840+
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
5841+
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
58485842
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
58495843
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
58505844
;
@@ -5867,23 +5861,20 @@ define float @v_log_f32_0() {
58675861
; GFX900-GISEL-LABEL: v_log_f32_0:
58685862
; GFX900-GISEL: ; %bb.0:
58695863
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5870-
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
5871-
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
5872-
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
5873-
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
5874-
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
5875-
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
5864+
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, 0
58765865
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5877-
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
5878-
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5879-
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5880-
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
5881-
; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5882-
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5883-
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
5884-
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
5885-
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5886-
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5866+
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
5867+
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5868+
; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
5869+
; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
5870+
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v2, v4
5871+
; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
5872+
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5873+
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
5874+
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5875+
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
5876+
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
5877+
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
58875878
; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
58885879
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
58895880
;
@@ -5907,21 +5898,18 @@ define float @v_log_f32_0() {
59075898
; GFX1100-GISEL-LABEL: v_log_f32_0:
59085899
; GFX1100-GISEL: ; %bb.0:
59095900
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5910-
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, 0
5911-
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, 0
5912-
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5913-
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
5914-
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
5901+
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, 0
5902+
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, 0
59155903
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
59165904
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5917-
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
5905+
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
59185906
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
59195907
; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
59205908
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
59215909
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
59225910
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5923-
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
5924-
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
5911+
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
5912+
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
59255913
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
59265914
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
59275915
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]

0 commit comments

Comments
 (0)