Skip to content

Commit cff2199

Browse files
committed
Revert "[GISel][AArch64][AMDGPU][RISCV] Canonicalize (sub X, C) -> (add X, -C) (#114309)"
This reverts commit 999dfb2. I received a report that his may have increased fallbacks on AArch64.
1 parent 8c75290 commit cff2199

37 files changed

+2062
-2292
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -321,9 +321,6 @@ class CombinerHelper {
321321
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
322322
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
323323

324-
// Transform a G_SUB with constant on the RHS to G_ADD.
325-
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo);
326-
327324
// Transform a G_SHL with an extended source into a narrower shift if
328325
// possible.
329326
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData);

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -335,14 +335,6 @@ def mul_to_shl : GICombineRule<
335335
[{ return Helper.matchCombineMulToShl(*${mi}, ${matchinfo}); }]),
336336
(apply [{ Helper.applyCombineMulToShl(*${mi}, ${matchinfo}); }])>;
337337

338-
// (sub x, C) -> (add x, -C)
339-
def sub_to_add : GICombineRule<
340-
(defs root:$d, build_fn_matchinfo:$matchinfo),
341-
(match (G_CONSTANT $c, $imm),
342-
(G_SUB $d, $op1, $c):$mi,
343-
[{ return Helper.matchCombineSubToAdd(*${mi}, ${matchinfo}); }]),
344-
(apply [{ Helper.applyBuildFnNoErase(*${mi}, ${matchinfo}); }])>;
345-
346338
// shl ([asz]ext x), y => zext (shl x, y), if shift does not overflow int
347339
def reduce_shl_of_extend_matchdata : GIDefMatchData<"RegisterImmPair">;
348340
def reduce_shl_of_extend : GICombineRule<
@@ -1911,9 +1903,8 @@ def bitreverse_shift : GICombineGroup<[bitreverse_shl, bitreverse_lshr]>;
19111903
def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp,
19121904
select_to_iminmax, match_selects]>;
19131905

1914-
def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, sub_to_add,
1915-
add_p2i_to_ptradd, mul_by_neg_one,
1916-
idempotent_prop]>;
1906+
def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
1907+
mul_by_neg_one, idempotent_prop]>;
19171908

19181909
def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
19191910
combine_fadd_fpext_fmul_to_fmad_or_fma, combine_fadd_fma_fmul_to_fmad_or_fma,

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2044,31 +2044,6 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
20442044
Observer.changedInstr(MI);
20452045
}
20462046

2047-
bool CombinerHelper::matchCombineSubToAdd(MachineInstr &MI,
2048-
BuildFnTy &MatchInfo) {
2049-
GSub &Sub = cast<GSub>(MI);
2050-
2051-
LLT Ty = MRI.getType(Sub.getReg(0));
2052-
2053-
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2054-
return false;
2055-
2056-
if (!isConstantLegalOrBeforeLegalizer(Ty))
2057-
return false;
2058-
2059-
APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2060-
2061-
MatchInfo = [=, &MI](MachineIRBuilder &B) {
2062-
auto NegCst = B.buildConstant(Ty, -Imm);
2063-
Observer.changingInstr(MI);
2064-
MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2065-
MI.getOperand(2).setReg(NegCst.getReg(0));
2066-
MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
2067-
Observer.changedInstr(MI);
2068-
};
2069-
return true;
2070-
}
2071-
20722047
// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
20732048
bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
20742049
RegisterImmPair &MatchData) {

llvm/lib/Target/RISCV/RISCVGISel.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,15 @@ def gi_zexti32 : GIComplexOperandMatcher<s64, "selectZExtBits<32>">,
101101
def gi_zexti16 : GIComplexOperandMatcher<s32, "selectZExtBits<16>">,
102102
GIComplexPatternEquiv<zexti16>;
103103

104+
// FIXME: Canonicalize (sub X, C) -> (add X, -C) earlier.
105+
def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)),
106+
(ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>;
107+
108+
let Predicates = [IsRV64] in {
109+
def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)),
110+
(ADDIW GPR:$rs1, (i64 (NegImm $imm)))>;
111+
}
112+
104113
// Ptr type used in patterns with GlobalISelEmitter
105114
def PtrVT : PtrValueTypeByHwMode<XLenVT, 0>;
106115

llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,8 @@ body: |
308308
; CHECK: liveins: $w0, $w1
309309
; CHECK-NEXT: {{ $}}
310310
; CHECK-NEXT: %a:_(s64) = COPY $x0
311-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -71
312-
; CHECK-NEXT: %sub:_(s64) = G_ADD %a, [[C]]
311+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 71
312+
; CHECK-NEXT: %sub:_(s64) = G_SUB %a, [[C]]
313313
; CHECK-NEXT: $x0 = COPY %sub(s64)
314314
; CHECK-NEXT: RET_ReallyLR implicit $x0
315315
%a:_(s64) = COPY $x0

llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ body: |
8888
; CHECK-LABEL: name: test_combine_trunc_sub_i128
8989
; CHECK: %lhs:_(s128) = COPY $q0
9090
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s128)
91-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -5
92-
; CHECK-NEXT: %small:_(s32) = G_ADD [[TRUNC]], [[C]]
91+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
92+
; CHECK-NEXT: %small:_(s32) = G_SUB [[TRUNC]], [[C]]
9393
; CHECK-NEXT: $w0 = COPY %small(s32)
9494
%lhs:_(s128) = COPY $q0
9595
%rhs:_(s128) = G_CONSTANT i128 5
@@ -103,8 +103,8 @@ body: |
103103
bb.1:
104104
; CHECK-LABEL: name: test_combine_trunc_sub_i128_multi_use
105105
; CHECK: %lhs:_(s128) = COPY $q0
106-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 -5
107-
; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, [[C]]
106+
; CHECK-NEXT: %rhs:_(s128) = G_CONSTANT i128 5
107+
; CHECK-NEXT: %res:_(s128) = G_SUB %lhs, %rhs
108108
; CHECK-NEXT: %small:_(s32) = G_TRUNC %res(s128)
109109
; CHECK-NEXT: $q0 = COPY %res(s128)
110110
; CHECK-NEXT: $w0 = COPY %small(s32)

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ body: |
9595
%11:_(s8) = G_CONSTANT i8 1
9696
; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
9797
%7:_(s8) = G_SUB %2, %11
98-
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
98+
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
9999
G_BR %bb.3.exit
100100
bb.3.exit:
101101
; CHECK: bb.3.exit:
@@ -197,7 +197,7 @@ body: |
197197
%7:_(s8) = G_CONSTANT i8 1
198198
; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
199199
%8:_(s8) = G_SUB %2, %7
200-
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
200+
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
201201
G_BR %bb.3.exit
202202
bb.3.exit:
203203
; CHECK: bb.3.exit:

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir

Lines changed: 2 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -289,8 +289,8 @@ body: |
289289
; CHECK: liveins: $w0
290290
; CHECK-NEXT: {{ $}}
291291
; CHECK-NEXT: %x:_(s32) = COPY $w0
292-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
293-
; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
292+
; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1
293+
; CHECK-NEXT: %op:_(s32) = G_SUB %x, %cst
294294
; CHECK-NEXT: $w0 = COPY %op(s32)
295295
; CHECK-NEXT: RET_ReallyLR implicit $w0
296296
%x:_(s32) = COPY $w0
@@ -488,66 +488,3 @@ body: |
488488
RET_ReallyLR implicit $w0
489489
490490
...
491-
---
492-
name: sub_to_add
493-
tracksRegLiveness: true
494-
body: |
495-
bb.1.entry:
496-
liveins: $w0
497-
; CHECK-LABEL: name: sub_to_add
498-
; CHECK: liveins: $w0
499-
; CHECK-NEXT: {{ $}}
500-
; CHECK-NEXT: %x:_(s32) = COPY $w0
501-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
502-
; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
503-
; CHECK-NEXT: $w0 = COPY %op(s32)
504-
; CHECK-NEXT: RET_ReallyLR implicit $w0
505-
%x:_(s32) = COPY $w0
506-
%cst:_(s32) = G_CONSTANT i32 1
507-
%op:_(s32) = G_SUB %x(s32), %cst
508-
$w0 = COPY %op(s32)
509-
RET_ReallyLR implicit $w0
510-
511-
...
512-
---
513-
name: sub_to_add_nuw
514-
tracksRegLiveness: true
515-
body: |
516-
bb.1.entry:
517-
liveins: $w0
518-
; CHECK-LABEL: name: sub_to_add_nuw
519-
; CHECK: liveins: $w0
520-
; CHECK-NEXT: {{ $}}
521-
; CHECK-NEXT: %x:_(s32) = COPY $w0
522-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
523-
; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
524-
; CHECK-NEXT: $w0 = COPY %op(s32)
525-
; CHECK-NEXT: RET_ReallyLR implicit $w0
526-
%x:_(s32) = COPY $w0
527-
%cst:_(s32) = G_CONSTANT i32 1
528-
%op:_(s32) = nuw G_SUB %x(s32), %cst
529-
$w0 = COPY %op(s32)
530-
RET_ReallyLR implicit $w0
531-
532-
...
533-
---
534-
name: sub_to_add_nsw
535-
tracksRegLiveness: true
536-
body: |
537-
bb.1.entry:
538-
liveins: $w0
539-
; CHECK-LABEL: name: sub_to_add_nsw
540-
; CHECK: liveins: $w0
541-
; CHECK-NEXT: {{ $}}
542-
; CHECK-NEXT: %x:_(s32) = COPY $w0
543-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
544-
; CHECK-NEXT: %op:_(s32) = nsw G_ADD %x, [[C]]
545-
; CHECK-NEXT: $w0 = COPY %op(s32)
546-
; CHECK-NEXT: RET_ReallyLR implicit $w0
547-
%x:_(s32) = COPY $w0
548-
%cst:_(s32) = G_CONSTANT i32 1
549-
%op:_(s32) = nsw G_SUB %x(s32), %cst
550-
$w0 = COPY %op(s32)
551-
RET_ReallyLR implicit $w0
552-
553-
...

llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1669,7 +1669,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
16691669
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3
16701670
; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3
16711671
; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], v8
1672-
; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v3
1672+
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 64, v3
16731673
; GFX6-NEXT: v_ashr_i64 v[10:11], v[4:5], v3
16741674
; GFX6-NEXT: v_or_b32_e32 v6, v6, v8
16751675
; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v5
@@ -1692,7 +1692,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
16921692
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3
16931693
; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
16941694
; GFX8-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
1695-
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
1695+
; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3
16961696
; GFX8-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
16971697
; GFX8-NEXT: v_or_b32_e32 v6, v6, v8
16981698
; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v5
@@ -1715,7 +1715,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
17151715
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3
17161716
; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
17171717
; GFX9-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
1718-
; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
1718+
; GFX9-NEXT: v_subrev_u32_e32 v2, 64, v3
17191719
; GFX9-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
17201720
; GFX9-NEXT: v_or_b32_e32 v6, v6, v8
17211721
; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v5
@@ -1735,7 +1735,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
17351735
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17361736
; GFX10-NEXT: v_bfe_i32 v4, v2, 0, 1
17371737
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3
1738-
; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
1738+
; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
17391739
; GFX10-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
17401740
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
17411741
; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
@@ -1758,7 +1758,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
17581758
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17591759
; GFX11-NEXT: v_bfe_i32 v4, v2, 0, 1
17601760
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3
1761-
; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
1761+
; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
17621762
; GFX11-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
17631763
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
17641764
; GFX11-NEXT: v_ashrrev_i32_e32 v5, 31, v4

llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1438,7 +1438,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
14381438
; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
14391439
; SI-NEXT: v_ffbh_i32_e32 v3, 0
14401440
; SI-NEXT: v_add_i32_e32 v2, vcc, 32, v2
1441-
; SI-NEXT: v_add_i32_e32 v3, vcc, -1, v3
1441+
; SI-NEXT: v_subrev_i32_e32 v3, vcc, 1, v3
14421442
; SI-NEXT: v_mov_b32_e32 v1, 0
14431443
; SI-NEXT: v_min_u32_e32 v2, v3, v2
14441444
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
@@ -1456,7 +1456,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
14561456
; VI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
14571457
; VI-NEXT: v_ffbh_i32_e32 v3, 0
14581458
; VI-NEXT: v_add_u32_e32 v2, vcc, 32, v2
1459-
; VI-NEXT: v_add_u32_e32 v3, vcc, -1, v3
1459+
; VI-NEXT: v_subrev_u32_e32 v3, vcc, 1, v3
14601460
; VI-NEXT: v_mov_b32_e32 v1, 0
14611461
; VI-NEXT: v_min_u32_e32 v2, v3, v2
14621462
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]

llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4101,7 +4101,7 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
41014101
; GFX10-NEXT: v_rcp_f32_e32 v1, 0x3f40e400
41024102
; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v0
41034103
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
4104-
; GFX10-NEXT: v_add_nc_u32_e32 v0, -14, v0
4104+
; GFX10-NEXT: v_subrev_nc_u32_e32 v0, 14, v0
41054105
; GFX10-NEXT: v_mul_f32_e32 v1, v2, v1
41064106
; GFX10-NEXT: v_ldexp_f32 v0, v1, v0
41074107
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -4112,9 +4112,10 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
41124112
; GFX11-NEXT: v_rcp_f32_e32 v1, 0x3f40e400
41134113
; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v0
41144114
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
4115+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
4116+
; GFX11-NEXT: v_subrev_nc_u32_e32 v0, 14, v0
41154117
; GFX11-NEXT: s_waitcnt_depctr 0xfff
4116-
; GFX11-NEXT: v_dual_mul_f32 v1, v2, v1 :: v_dual_add_nc_u32 v0, -14, v0
4117-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4118+
; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
41184119
; GFX11-NEXT: v_ldexp_f32 v0, v1, v0
41194120
; GFX11-NEXT: s_setpc_b64 s[30:31]
41204121
; EG-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:

0 commit comments

Comments
 (0)