Skip to content

Commit 999dfb2

Browse files
authored
[GISel][AArch64][AMDGPU][RISCV] Canonicalize (sub X, C) -> (add X, -C) (#114309)
This matches InstCombine and DAGCombine. RISC-V only has an ADDI instruction so without this we need additional patterns to do the conversion. Some of the AMDGPU tests look like possible regressions. Maybe some patterns from isel aren't imported.
1 parent 00a93e6 commit 999dfb2

36 files changed

+2281
-2051
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,9 @@ class CombinerHelper {
321321
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
322322
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
323323

324+
// Transform a G_SUB with constant on the RHS to G_ADD.
325+
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo);
326+
324327
// Transform a G_SHL with an extended source into a narrower shift if
325328
// possible.
326329
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData);

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,14 @@ def mul_to_shl : GICombineRule<
335335
[{ return Helper.matchCombineMulToShl(*${mi}, ${matchinfo}); }]),
336336
(apply [{ Helper.applyCombineMulToShl(*${mi}, ${matchinfo}); }])>;
337337

338+
// (sub x, C) -> (add x, -C)
339+
def sub_to_add : GICombineRule<
340+
(defs root:$d, build_fn_matchinfo:$matchinfo),
341+
(match (G_CONSTANT $c, $imm),
342+
(G_SUB $d, $op1, $c):$mi,
343+
[{ return Helper.matchCombineSubToAdd(*${mi}, ${matchinfo}); }]),
344+
(apply [{ Helper.applyBuildFnNoErase(*${mi}, ${matchinfo}); }])>;
345+
338346
// shl ([asz]ext x), y => zext (shl x, y), if shift does not overflow int
339347
def reduce_shl_of_extend_matchdata : GIDefMatchData<"RegisterImmPair">;
340348
def reduce_shl_of_extend : GICombineRule<
@@ -1903,8 +1911,9 @@ def bitreverse_shift : GICombineGroup<[bitreverse_shl, bitreverse_lshr]>;
19031911
def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp,
19041912
select_to_iminmax, match_selects]>;
19051913

1906-
def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
1907-
mul_by_neg_one, idempotent_prop]>;
1914+
def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, sub_to_add,
1915+
add_p2i_to_ptradd, mul_by_neg_one,
1916+
idempotent_prop]>;
19081917

19091918
def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
19101919
combine_fadd_fpext_fmul_to_fmad_or_fma, combine_fadd_fma_fmul_to_fmad_or_fma,

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2044,6 +2044,31 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
20442044
Observer.changedInstr(MI);
20452045
}
20462046

2047+
bool CombinerHelper::matchCombineSubToAdd(MachineInstr &MI,
2048+
BuildFnTy &MatchInfo) {
2049+
GSub &Sub = cast<GSub>(MI);
2050+
2051+
LLT Ty = MRI.getType(Sub.getReg(0));
2052+
2053+
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2054+
return false;
2055+
2056+
if (!isConstantLegalOrBeforeLegalizer(Ty))
2057+
return false;
2058+
2059+
APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2060+
2061+
MatchInfo = [=, &MI](MachineIRBuilder &B) {
2062+
auto NegCst = B.buildConstant(Ty, -Imm);
2063+
Observer.changingInstr(MI);
2064+
MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2065+
MI.getOperand(2).setReg(NegCst.getReg(0));
2066+
MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
2067+
Observer.changedInstr(MI);
2068+
};
2069+
return true;
2070+
}
2071+
20472072
// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
20482073
bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
20492074
RegisterImmPair &MatchData) {

llvm/lib/Target/RISCV/RISCVGISel.td

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -96,15 +96,6 @@ def gi_sh2add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<2>">,
9696
def gi_sh3add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<3>">,
9797
GIComplexPatternEquiv<sh3add_uw_op>;
9898

99-
// FIXME: Canonicalize (sub X, C) -> (add X, -C) earlier.
100-
def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)),
101-
(ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>;
102-
103-
let Predicates = [IsRV64] in {
104-
def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)),
105-
(ADDIW GPR:$rs1, (i64 (NegImm $imm)))>;
106-
}
107-
10899
// Ptr type used in patterns with GlobalISelEmitter
109100
def PtrVT : PtrValueTypeByHwMode<XLenVT, 0>;
110101

llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,8 @@ body: |
308308
; CHECK: liveins: $w0, $w1
309309
; CHECK-NEXT: {{ $}}
310310
; CHECK-NEXT: %a:_(s64) = COPY $x0
311-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 71
312-
; CHECK-NEXT: %sub:_(s64) = G_SUB %a, [[C]]
311+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -71
312+
; CHECK-NEXT: %sub:_(s64) = G_ADD %a, [[C]]
313313
; CHECK-NEXT: $x0 = COPY %sub(s64)
314314
; CHECK-NEXT: RET_ReallyLR implicit $x0
315315
%a:_(s64) = COPY $x0

llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ body: |
8888
; CHECK-LABEL: name: test_combine_trunc_sub_i128
8989
; CHECK: %lhs:_(s128) = COPY $q0
9090
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s128)
91-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
92-
; CHECK-NEXT: %small:_(s32) = G_SUB [[TRUNC]], [[C]]
91+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -5
92+
; CHECK-NEXT: %small:_(s32) = G_ADD [[TRUNC]], [[C]]
9393
; CHECK-NEXT: $w0 = COPY %small(s32)
9494
%lhs:_(s128) = COPY $q0
9595
%rhs:_(s128) = G_CONSTANT i128 5
@@ -103,8 +103,8 @@ body: |
103103
bb.1:
104104
; CHECK-LABEL: name: test_combine_trunc_sub_i128_multi_use
105105
; CHECK: %lhs:_(s128) = COPY $q0
106-
; CHECK-NEXT: %rhs:_(s128) = G_CONSTANT i128 5
107-
; CHECK-NEXT: %res:_(s128) = G_SUB %lhs, %rhs
106+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 -5
107+
; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, [[C]]
108108
; CHECK-NEXT: %small:_(s32) = G_TRUNC %res(s128)
109109
; CHECK-NEXT: $q0 = COPY %res(s128)
110110
; CHECK-NEXT: $w0 = COPY %small(s32)

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ body: |
9595
%11:_(s8) = G_CONSTANT i8 1
9696
; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
9797
%7:_(s8) = G_SUB %2, %11
98-
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
98+
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
9999
G_BR %bb.3.exit
100100
bb.3.exit:
101101
; CHECK: bb.3.exit:
@@ -197,7 +197,7 @@ body: |
197197
%7:_(s8) = G_CONSTANT i8 1
198198
; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
199199
%8:_(s8) = G_SUB %2, %7
200-
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
200+
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
201201
G_BR %bb.3.exit
202202
bb.3.exit:
203203
; CHECK: bb.3.exit:

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,8 +289,8 @@ body: |
289289
; CHECK: liveins: $w0
290290
; CHECK-NEXT: {{ $}}
291291
; CHECK-NEXT: %x:_(s32) = COPY $w0
292-
; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1
293-
; CHECK-NEXT: %op:_(s32) = G_SUB %x, %cst
292+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
293+
; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
294294
; CHECK-NEXT: $w0 = COPY %op(s32)
295295
; CHECK-NEXT: RET_ReallyLR implicit $w0
296296
%x:_(s32) = COPY $w0
@@ -488,3 +488,66 @@ body: |
488488
RET_ReallyLR implicit $w0
489489
490490
...
491+
---
492+
name: sub_to_add
493+
tracksRegLiveness: true
494+
body: |
495+
bb.1.entry:
496+
liveins: $w0
497+
; CHECK-LABEL: name: sub_to_add
498+
; CHECK: liveins: $w0
499+
; CHECK-NEXT: {{ $}}
500+
; CHECK-NEXT: %x:_(s32) = COPY $w0
501+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
502+
; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
503+
; CHECK-NEXT: $w0 = COPY %op(s32)
504+
; CHECK-NEXT: RET_ReallyLR implicit $w0
505+
%x:_(s32) = COPY $w0
506+
%cst:_(s32) = G_CONSTANT i32 1
507+
%op:_(s32) = G_SUB %x(s32), %cst
508+
$w0 = COPY %op(s32)
509+
RET_ReallyLR implicit $w0
510+
511+
...
512+
---
513+
name: sub_to_add_nuw
514+
tracksRegLiveness: true
515+
body: |
516+
bb.1.entry:
517+
liveins: $w0
518+
; CHECK-LABEL: name: sub_to_add_nuw
519+
; CHECK: liveins: $w0
520+
; CHECK-NEXT: {{ $}}
521+
; CHECK-NEXT: %x:_(s32) = COPY $w0
522+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
523+
; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
524+
; CHECK-NEXT: $w0 = COPY %op(s32)
525+
; CHECK-NEXT: RET_ReallyLR implicit $w0
526+
%x:_(s32) = COPY $w0
527+
%cst:_(s32) = G_CONSTANT i32 1
528+
%op:_(s32) = nuw G_SUB %x(s32), %cst
529+
$w0 = COPY %op(s32)
530+
RET_ReallyLR implicit $w0
531+
532+
...
533+
---
534+
name: sub_to_add_nsw
535+
tracksRegLiveness: true
536+
body: |
537+
bb.1.entry:
538+
liveins: $w0
539+
; CHECK-LABEL: name: sub_to_add_nsw
540+
; CHECK: liveins: $w0
541+
; CHECK-NEXT: {{ $}}
542+
; CHECK-NEXT: %x:_(s32) = COPY $w0
543+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
544+
; CHECK-NEXT: %op:_(s32) = nsw G_ADD %x, [[C]]
545+
; CHECK-NEXT: $w0 = COPY %op(s32)
546+
; CHECK-NEXT: RET_ReallyLR implicit $w0
547+
%x:_(s32) = COPY $w0
548+
%cst:_(s32) = G_CONSTANT i32 1
549+
%op:_(s32) = nsw G_SUB %x(s32), %cst
550+
$w0 = COPY %op(s32)
551+
RET_ReallyLR implicit $w0
552+
553+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1669,7 +1669,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
16691669
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3
16701670
; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3
16711671
; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], v8
1672-
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 64, v3
1672+
; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v3
16731673
; GFX6-NEXT: v_ashr_i64 v[10:11], v[4:5], v3
16741674
; GFX6-NEXT: v_or_b32_e32 v6, v6, v8
16751675
; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v5
@@ -1692,7 +1692,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
16921692
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3
16931693
; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
16941694
; GFX8-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
1695-
; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3
1695+
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
16961696
; GFX8-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
16971697
; GFX8-NEXT: v_or_b32_e32 v6, v6, v8
16981698
; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v5
@@ -1715,7 +1715,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
17151715
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3
17161716
; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
17171717
; GFX9-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
1718-
; GFX9-NEXT: v_subrev_u32_e32 v2, 64, v3
1718+
; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
17191719
; GFX9-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
17201720
; GFX9-NEXT: v_or_b32_e32 v6, v6, v8
17211721
; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v5
@@ -1735,7 +1735,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
17351735
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17361736
; GFX10-NEXT: v_bfe_i32 v4, v2, 0, 1
17371737
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3
1738-
; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
1738+
; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
17391739
; GFX10-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
17401740
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
17411741
; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
@@ -1758,7 +1758,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
17581758
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17591759
; GFX11-NEXT: v_bfe_i32 v4, v2, 0, 1
17601760
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3
1761-
; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
1761+
; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
17621762
; GFX11-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
17631763
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
17641764
; GFX11-NEXT: v_ashrrev_i32_e32 v5, 31, v4

llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1438,7 +1438,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
14381438
; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
14391439
; SI-NEXT: v_ffbh_i32_e32 v3, 0
14401440
; SI-NEXT: v_add_i32_e32 v2, vcc, 32, v2
1441-
; SI-NEXT: v_subrev_i32_e32 v3, vcc, 1, v3
1441+
; SI-NEXT: v_add_i32_e32 v3, vcc, -1, v3
14421442
; SI-NEXT: v_mov_b32_e32 v1, 0
14431443
; SI-NEXT: v_min_u32_e32 v2, v3, v2
14441444
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
@@ -1456,7 +1456,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
14561456
; VI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
14571457
; VI-NEXT: v_ffbh_i32_e32 v3, 0
14581458
; VI-NEXT: v_add_u32_e32 v2, vcc, 32, v2
1459-
; VI-NEXT: v_subrev_u32_e32 v3, vcc, 1, v3
1459+
; VI-NEXT: v_add_u32_e32 v3, vcc, -1, v3
14601460
; VI-NEXT: v_mov_b32_e32 v1, 0
14611461
; VI-NEXT: v_min_u32_e32 v2, v3, v2
14621462
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]

llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4101,7 +4101,7 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
41014101
; GFX10-NEXT: v_rcp_f32_e32 v1, 0x3f40e400
41024102
; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v0
41034103
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
4104-
; GFX10-NEXT: v_subrev_nc_u32_e32 v0, 14, v0
4104+
; GFX10-NEXT: v_add_nc_u32_e32 v0, -14, v0
41054105
; GFX10-NEXT: v_mul_f32_e32 v1, v2, v1
41064106
; GFX10-NEXT: v_ldexp_f32 v0, v1, v0
41074107
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -4112,10 +4112,9 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
41124112
; GFX11-NEXT: v_rcp_f32_e32 v1, 0x3f40e400
41134113
; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v0
41144114
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
4115-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
4116-
; GFX11-NEXT: v_subrev_nc_u32_e32 v0, 14, v0
41174115
; GFX11-NEXT: s_waitcnt_depctr 0xfff
4118-
; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
4116+
; GFX11-NEXT: v_dual_mul_f32 v1, v2, v1 :: v_dual_add_nc_u32 v0, -14, v0
4117+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
41194118
; GFX11-NEXT: v_ldexp_f32 v0, v1, v0
41204119
; GFX11-NEXT: s_setpc_b64 s[30:31]
41214120
; EG-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:

0 commit comments

Comments
 (0)