llvm · arsenm · Sep 7, 2023 · Sep 1, 2023 · Sep 7, 2023 · arsenm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -434,6 +434,9 @@ class CombinerHelper {
   /// Replace an instruction with a G_FCONSTANT with value \p C.
   void replaceInstWithFConstant(MachineInstr &MI, double C);
 
+  /// Replace an instruction with an G_FCONSTANT with value \p CFP.
+  void replaceInstWithFConstant(MachineInstr &MI, ConstantFP *CFP);
+
   /// Replace an instruction with a G_CONSTANT with value \p C.
   void replaceInstWithConstant(MachineInstr &MI, int64_t C);
 
@@ -641,6 +644,9 @@ class CombinerHelper {
   /// Do constant folding when opportunities are exposed after MIR building.
   bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo);
 
+  /// Do constant FP folding when opportunities are exposed after MIR building.
+  bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP* &MatchInfo);
+
   /// \returns true if it is possible to narrow the width of a scalar binop
   /// feeding a G_AND instruction \p MI.
   bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo);

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -155,6 +155,7 @@ def instruction_steps_matchdata: GIDefMatchData<"InstructionStepsMatchInfo">;
 def register_matchinfo: GIDefMatchData<"Register">;
 def int64_matchinfo: GIDefMatchData<"int64_t">;
 def apint_matchinfo : GIDefMatchData<"APInt">;
+def constantfp_matchinfo : GIDefMatchData<"ConstantFP*">;
 def build_fn_matchinfo :
 GIDefMatchData<"std::function<void(MachineIRBuilder &)>">;
 def unsigned_matchinfo: GIDefMatchData<"unsigned">;
@@ -946,6 +947,12 @@ def constant_fold_binop : GICombineRule<
    [{ return Helper.matchConstantFoldBinOp(*${d}, ${matchinfo}); }]),
   (apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;
 
+def constant_fold_fp_binop : GICombineRule<
+  (defs root:$d, constantfp_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_FADD, G_FSUB, G_FMUL, G_FDIV):$d,
+   [{ return Helper.matchConstantFoldFPBinOp(*${d}, ${matchinfo}); }]),
+  (apply [{ Helper.replaceInstWithFConstant(*${d}, ${matchinfo}); }])>;
+
 def constant_fold_cast_op : GICombineRule<
   (defs root:$d, apint_matchinfo:$matchinfo),
   (match (wip_match_opcode G_ZEXT, G_SEXT, G_ANYEXT):$d,
@@ -1197,6 +1204,9 @@ def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
   combine_fsub_fneg_fmul_to_fmad_or_fma, combine_fsub_fpext_fmul_to_fmad_or_fma,
   combine_fsub_fpext_fneg_fmul_to_fmad_or_fma]>;
 
+def constant_fold_binops : GICombineGroup<[constant_fold_binop,
+                                           constant_fold_fp_binop]>;
+
 def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     extract_vec_elt_combines, combines_for_extload,
     combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
@@ -1211,7 +1221,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
     shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
     div_rem_to_divrem, funnel_shift_combines, commute_shift,
-    form_bitfield_extract, constant_fold_binop, constant_fold_cast_op, fabs_fneg_fold,
+    form_bitfield_extract, constant_fold_binops, constant_fold_cast_op, fabs_fneg_fold,
     intdiv_combines, mulh_combines, redundant_neg_operands,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
     sub_add_reg, select_to_minmax, redundant_binop_in_equality,

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2645,6 +2645,13 @@ void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
   MI.eraseFromParent();
 }
 
+void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, ConstantFP *CFP) {
+  assert(MI.getNumDefs() == 1 && "Expected only one def?");
+  Builder.setInstr(MI);
+  Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
+  MI.eraseFromParent();
+}
+
 void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
   assert(MI.getNumDefs() == 1 && "Expected only one def?");
   Builder.setInstr(MI);
@@ -4531,6 +4538,17 @@ bool CombinerHelper::matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo)
   return true;
 }
 
+bool CombinerHelper::matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP* &MatchInfo) {
+  Register Op1 = MI.getOperand(1).getReg();
+  Register Op2 = MI.getOperand(2).getReg();
+  auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
+  if (!MaybeCst)
+    return false;
+  MatchInfo =
+      ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
+  return true;
+}
+
 bool CombinerHelper::matchNarrowBinopFeedingAnd(
     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
   // Look for a binop feeding into an AND with a mask:

diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -238,7 +238,7 @@ def AArch64PostLegalizerCombiner
                         form_bitfield_extract, rotate_out_of_range,
                         icmp_to_true_false_known_bits, merge_unmerge,
                         select_combines, fold_merge_to_zext,
-                        constant_fold_binop, identity_combines,
+                        constant_fold_binops, identity_combines,
                         ptr_add_immed_chain, overlapping_and,
                         split_store_zero_128, undef_combines,
                         select_to_minmax]> {

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-constant-fold.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-constant-fold.mir
@@ -139,3 +139,113 @@ body:             |
     RET_ReallyLR implicit $x0
 
 ...
+---
+name:            fadd
+legalized:       true
+liveins:
+  - { reg: '$d0' }
+body:             |
+  bb.1.entry:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: fadd
+    ; CHECK: liveins: $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 4.200000e+01
+    ; CHECK-NEXT: $d0 = COPY %res(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %a:_(s64) = G_FCONSTANT double 40.0
+    %b:_(s64) = G_FCONSTANT double 2.0
+    %res:_(s64) = G_FADD %a, %b
+    $d0 = COPY %res(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            fsub
+legalized:       true
+liveins:
+  - { reg: '$d0' }
+body:             |
+  bb.1.entry:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: fsub
+    ; CHECK: liveins: $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 3.800000e+01
+    ; CHECK-NEXT: $d0 = COPY %res(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %a:_(s64) = G_FCONSTANT double 40.0
+    %b:_(s64) = G_FCONSTANT double 2.0
+    %res:_(s64) = G_FSUB %a, %b
+    $d0 = COPY %res(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            fmul
+legalized:       true
+liveins:
+  - { reg: '$d0' }
+body:             |
+  bb.1.entry:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: fmul
+    ; CHECK: liveins: $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 8.000000e+01
+    ; CHECK-NEXT: $d0 = COPY %res(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %a:_(s64) = G_FCONSTANT double 40.0
+    %b:_(s64) = G_FCONSTANT double 2.0
+    %res:_(s64) = G_FMUL %a, %b
+    $d0 = COPY %res(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            fdiv
+legalized:       true
+liveins:
+  - { reg: '$d0' }
+body:             |
+  bb.1.entry:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: fdiv
+    ; CHECK: liveins: $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 2.000000e+01
+    ; CHECK-NEXT: $d0 = COPY %res(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %a:_(s64) = G_FCONSTANT double 40.0
+    %b:_(s64) = G_FCONSTANT double 2.0
+    %res:_(s64) = G_FDIV %a, %b
+    $d0 = COPY %res(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            fadd32
+legalized:       true
+liveins:
+  - { reg: '$s0' }
+body:             |
+  bb.1.entry:
+    liveins: $s0
+
+    ; CHECK-LABEL: name: fadd32
+    ; CHECK: liveins: $s0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %res:_(s32) = G_FCONSTANT float 4.200000e+01
+    ; CHECK-NEXT: $s0 = COPY %res(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $s0
+    %a:_(s32) = G_FCONSTANT float 40.0
+    %b:_(s32) = G_FCONSTANT float 2.0
+    %res:_(s32) = G_FADD %a, %b
+    $s0 = COPY %res(s32)
+    RET_ReallyLR implicit $s0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -5783,23 +5783,20 @@ define float @v_log_f32_0() {
 ; SI-GISEL-LABEL: v_log_f32_0:
 ; SI-GISEL:       ; %bb.0:
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
-; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
-; SI-GISEL-NEXT:    v_mul_f32_e32 v1, 0, v1
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
-; SI-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT:    v_log_f32_e32 v0, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s4, 0x3f317217
-; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3377d1cf
-; SI-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
-; SI-GISEL-NEXT:    v_fma_f32 v2, v0, s4, -v1
-; SI-GISEL-NEXT:    v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT:    v_add_f32_e32 v1, v1, v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[4:5]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x41b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3377d1cf
+; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x3f317217, v0
+; SI-GISEL-NEXT:    v_fma_f32 v4, v0, s4, -v3
+; SI-GISEL-NEXT:    v_fma_f32 v2, v0, v2, v4
+; SI-GISEL-NEXT:    v_add_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v3
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x41b17218
+; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
 ; SI-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5825,26 +5822,23 @@ define float @v_log_f32_0() {
 ; VI-GISEL-LABEL: v_log_f32_0:
 ; VI-GISEL:       ; %bb.0:
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
-; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
-; VI-GISEL-NEXT:    v_mul_f32_e32 v1, 0, v1
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-GISEL-NEXT:    v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT:    v_and_b32_e32 v1, 0xfffff000, v0
-; VI-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT:    v_log_f32_e32 v0, 0
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT:    v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT:    v_sub_f32_e32 v3, v0, v2
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v5, 0x3805fdf4, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-GISEL-NEXT:    v_add_f32_e32 v4, v4, v5
 ; VI-GISEL-NEXT:    v_add_f32_e32 v3, v3, v4
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 0x3f317000, v2
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
-; VI-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3f317000, v1
-; VI-GISEL-NEXT:    v_add_f32_e32 v1, v1, v2
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[4:5]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x41b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v3
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x41b17218
+; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
 ; VI-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5867,23 +5861,20 @@ define float @v_log_f32_0() {
 ; GFX900-GISEL-LABEL: v_log_f32_0:
 ; GFX900-GISEL:       ; %bb.0:
 ; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
-; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
-; GFX900-GISEL-NEXT:    v_mul_f32_e32 v1, 0, v1
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
-; GFX900-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT:    v_log_f32_e32 v0, 0
 ; GFX900-GISEL-NEXT:    s_mov_b32 s4, 0x3f317217
-; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3377d1cf
-; GFX900-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX900-GISEL-NEXT:    v_fma_f32 v2, v0, s4, -v1
-; GFX900-GISEL-NEXT:    v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[4:5]
-; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x41b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3377d1cf
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT:    v_mul_f32_e32 v3, 0x3f317217, v0
+; GFX900-GISEL-NEXT:    v_fma_f32 v4, v0, s4, -v3
+; GFX900-GISEL-NEXT:    v_fma_f32 v2, v0, v2, v4
+; GFX900-GISEL-NEXT:    v_add_f32_e32 v2, v3, v2
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v3
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x41b17218
+; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
 ; GFX900-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5907,21 +5898,18 @@ define float @v_log_f32_0() {
 ; GFX1100-GISEL-LABEL: v_log_f32_0:
 ; GFX1100-GISEL:       ; %bb.0:
 ; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT:    v_mul_f32_e64 v0, 0x4f800000, 0
-; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x800000, 0
-; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc_lo
-; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, 0
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 s0, 0x800000, 0
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-GISEL-NEXT:    v_fma_f32 v2, v0, 0x3f317217, -v1
 ; GFX1100-GISEL-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-GISEL-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
-; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]