Skip to content

Commit f8d1905

Browse files
author
Thorsten Schütt
authored
[GlobalISel] Combine [S,U]SUBO (#116489)
We import the llvm.ssub.with.overflow.* Intrinsics, but the Legalizer also builds them while legalizing other opcodes, see narrowScalarAddSub.
1 parent 0c42168 commit f8d1905

File tree

10 files changed

+932
-727
lines changed

10 files changed

+932
-727
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -939,6 +939,9 @@ class CombinerHelper {
939939
// merge_values(_, zero) -> zext
940940
bool matchMergeXAndZero(const MachineInstr &MI, BuildFnTy &MatchInfo);
941941

942+
// overflow sub
943+
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo);
944+
942945
private:
943946
/// Checks for legality of an indexed variant of \p LdSt.
944947
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;

llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,23 @@ class GAddCarryOut : public GBinOpCarryOut {
486486
}
487487
};
488488

489+
/// Represents overflowing sub operations.
490+
/// G_USUBO, G_SSUBO
491+
class GSubCarryOut : public GBinOpCarryOut {
492+
public:
493+
bool isSigned() const { return getOpcode() == TargetOpcode::G_SSUBO; }
494+
495+
static bool classof(const MachineInstr *MI) {
496+
switch (MI->getOpcode()) {
497+
case TargetOpcode::G_USUBO:
498+
case TargetOpcode::G_SSUBO:
499+
return true;
500+
default:
501+
return false;
502+
}
503+
}
504+
};
505+
489506
/// Represents overflowing add/sub operations that also consume a carry-in.
490507
/// G_UADDE, G_SADDE, G_USUBE, G_SSUBE
491508
class GAddSubCarryInOut : public GAddSubCarryOut {

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1385,6 +1385,12 @@ def match_addos : GICombineRule<
13851385
[{ return Helper.matchAddOverflow(*${root}, ${matchinfo}); }]),
13861386
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
13871387

1388+
def match_subo_no_overflow : GICombineRule<
1389+
(defs root:$root, build_fn_matchinfo:$matchinfo),
1390+
(match (wip_match_opcode G_SSUBO, G_USUBO):$root,
1391+
[{ return Helper.matchSuboCarryOut(*${root}, ${matchinfo}); }]),
1392+
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
1393+
13881394
def match_extract_of_element_undef_vector: GICombineRule <
13891395
(defs root:$root),
13901396
(match (G_IMPLICIT_DEF $vector),
@@ -1901,6 +1907,12 @@ def cmp_combines: GICombineGroup<[
19011907
redundant_binop_in_equality
19021908
]>;
19031909

1910+
1911+
def overflow_combines: GICombineGroup<[
1912+
match_addos,
1913+
match_subo_no_overflow
1914+
]>;
1915+
19041916
// FIXME: These should use the custom predicate feature once it lands.
19051917
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
19061918
undef_to_negative_one,
@@ -1984,9 +1996,9 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
19841996
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
19851997
sub_add_reg, select_to_minmax,
19861998
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
1987-
combine_concat_vector, match_addos,
1999+
combine_concat_vector,
19882000
sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
1989-
combine_use_vector_truncate, merge_combines]>;
2001+
combine_use_vector_truncate, merge_combines, overflow_combines]>;
19902002

19912003
// A combine group used to for prelegalizer combiners at -O0. The combines in
19922004
// this group have been selected based on experiments to balance code size and

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7790,3 +7790,78 @@ bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI,
77907790

77917791
return true;
77927792
}
7793+
7794+
bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI,
7795+
BuildFnTy &MatchInfo) {
7796+
const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
7797+
7798+
Register Dst = Subo->getReg(0);
7799+
Register LHS = Subo->getLHSReg();
7800+
Register RHS = Subo->getRHSReg();
7801+
Register Carry = Subo->getCarryOutReg();
7802+
LLT DstTy = MRI.getType(Dst);
7803+
LLT CarryTy = MRI.getType(Carry);
7804+
7805+
// Check legality before known bits.
7806+
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
7807+
!isConstantLegalOrBeforeLegalizer(CarryTy))
7808+
return false;
7809+
7810+
ConstantRange KBLHS =
7811+
ConstantRange::fromKnownBits(KB->getKnownBits(LHS),
7812+
/* IsSigned=*/Subo->isSigned());
7813+
ConstantRange KBRHS =
7814+
ConstantRange::fromKnownBits(KB->getKnownBits(RHS),
7815+
/* IsSigned=*/Subo->isSigned());
7816+
7817+
if (Subo->isSigned()) {
7818+
// G_SSUBO
7819+
switch (KBLHS.signedSubMayOverflow(KBRHS)) {
7820+
case ConstantRange::OverflowResult::MayOverflow:
7821+
return false;
7822+
case ConstantRange::OverflowResult::NeverOverflows: {
7823+
MatchInfo = [=](MachineIRBuilder &B) {
7824+
B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7825+
B.buildConstant(Carry, 0);
7826+
};
7827+
return true;
7828+
}
7829+
case ConstantRange::OverflowResult::AlwaysOverflowsLow:
7830+
case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
7831+
MatchInfo = [=](MachineIRBuilder &B) {
7832+
B.buildSub(Dst, LHS, RHS);
7833+
B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
7834+
/*isVector=*/CarryTy.isVector(),
7835+
/*isFP=*/false));
7836+
};
7837+
return true;
7838+
}
7839+
}
7840+
return false;
7841+
}
7842+
7843+
// G_USUBO
7844+
switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
7845+
case ConstantRange::OverflowResult::MayOverflow:
7846+
return false;
7847+
case ConstantRange::OverflowResult::NeverOverflows: {
7848+
MatchInfo = [=](MachineIRBuilder &B) {
7849+
B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
7850+
B.buildConstant(Carry, 0);
7851+
};
7852+
return true;
7853+
}
7854+
case ConstantRange::OverflowResult::AlwaysOverflowsLow:
7855+
case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
7856+
MatchInfo = [=](MachineIRBuilder &B) {
7857+
B.buildSub(Dst, LHS, RHS);
7858+
B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
7859+
/*isVector=*/CarryTy.isVector(),
7860+
/*isFP=*/false));
7861+
};
7862+
return true;
7863+
}
7864+
}
7865+
7866+
return false;
7867+
}

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -322,13 +322,13 @@ def AArch64PostLegalizerCombiner
322322
extractvecelt_pairwise_add, redundant_or,
323323
mul_const, redundant_sext_inreg,
324324
form_bitfield_extract, rotate_out_of_range,
325-
icmp_to_true_false_known_bits,
326-
select_combines, fold_merge_to_zext,
325+
icmp_to_true_false_known_bits, overflow_combines,
326+
select_combines, fold_merge_to_zext, merge_combines,
327327
constant_fold_binops, identity_combines,
328328
ptr_add_immed_chain, overlapping_and,
329329
split_store_zero_128, undef_combines,
330330
select_to_minmax, or_to_bsp, combine_concat_vector,
331-
commute_constant_to_rhs, merge_combines,
331+
commute_constant_to_rhs,
332332
push_freeze_to_prevent_poison_from_propagating,
333333
combine_mul_cmlt, combine_use_vector_truncate]> {
334334
}

llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,104 @@ body: |
176176
$q1 = COPY %o_wide
177177
RET_ReallyLR implicit $w0
178178
...
179+
---
180+
name: sub_may
181+
body: |
182+
bb.0:
183+
liveins: $w0, $w1
184+
; CHECK-LABEL: name: sub_may
185+
; CHECK: liveins: $w0, $w1
186+
; CHECK-NEXT: {{ $}}
187+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
188+
; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 512
189+
; CHECK-NEXT: %sub:_(s32), %o:_(s1) = G_SSUBO [[COPY]], %const
190+
; CHECK-NEXT: %o_wide:_(s32) = G_ZEXT %o(s1)
191+
; CHECK-NEXT: $w0 = COPY %sub(s32)
192+
; CHECK-NEXT: $w1 = COPY %o_wide(s32)
193+
; CHECK-NEXT: RET_ReallyLR implicit $w0
194+
%0:_(s32) = COPY $w0
195+
%const:_(s32) = G_CONSTANT i32 512
196+
%sub:_(s32), %o:_(s1) = G_SSUBO %0, %const
197+
%o_wide:_(s32) = G_ZEXT %o(s1)
198+
$w0 = COPY %sub(s32)
199+
$w1 = COPY %o_wide
200+
RET_ReallyLR implicit $w0
201+
...
202+
---
203+
name: usub_may
204+
body: |
205+
bb.0:
206+
liveins: $w0, $w1
207+
; CHECK-LABEL: name: usub_may
208+
; CHECK: liveins: $w0, $w1
209+
; CHECK-NEXT: {{ $}}
210+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
211+
; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 512
212+
; CHECK-NEXT: %sub:_(s32), %o:_(s1) = G_USUBO [[COPY]], %const
213+
; CHECK-NEXT: %o_wide:_(s32) = G_ZEXT %o(s1)
214+
; CHECK-NEXT: $w0 = COPY %sub(s32)
215+
; CHECK-NEXT: $w1 = COPY %o_wide(s32)
216+
; CHECK-NEXT: RET_ReallyLR implicit $w0
217+
%0:_(s32) = COPY $w0
218+
%const:_(s32) = G_CONSTANT i32 512
219+
%sub:_(s32), %o:_(s1) = G_USUBO %0, %const
220+
%o_wide:_(s32) = G_ZEXT %o(s1)
221+
$w0 = COPY %sub(s32)
222+
$w1 = COPY %o_wide
223+
RET_ReallyLR implicit $w0
224+
...
225+
---
226+
name: usub_may_carry_s11
227+
body: |
228+
bb.0:
229+
liveins: $w0, $w1
230+
; CHECK-LABEL: name: usub_may_carry_s11
231+
; CHECK: liveins: $w0, $w1
232+
; CHECK-NEXT: {{ $}}
233+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
234+
; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 512
235+
; CHECK-NEXT: %sub:_(s32), %o:_(s11) = G_USUBO [[COPY]], %const
236+
; CHECK-NEXT: %o_wide:_(s32) = G_ZEXT %o(s11)
237+
; CHECK-NEXT: $w0 = COPY %sub(s32)
238+
; CHECK-NEXT: $w1 = COPY %o_wide(s32)
239+
; CHECK-NEXT: RET_ReallyLR implicit $w0
240+
%0:_(s32) = COPY $w0
241+
%const:_(s32) = G_CONSTANT i32 512
242+
%sub:_(s32), %o:_(s11) = G_USUBO %0, %const
243+
%o_wide:_(s32) = G_ZEXT %o(s11)
244+
$w0 = COPY %sub(s32)
245+
$w1 = COPY %o_wide
246+
RET_ReallyLR implicit $w0
247+
...
248+
---
249+
name: usub_may_carry_s11_vector
250+
body: |
251+
bb.0:
252+
liveins: $w0, $w1
253+
; CHECK-LABEL: name: usub_may_carry_s11_vector
254+
; CHECK: liveins: $w0, $w1
255+
; CHECK-NEXT: {{ $}}
256+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
257+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
258+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0
259+
; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 512
260+
; CHECK-NEXT: %bv:_(<4 x s32>) = G_BUILD_VECTOR %const(s32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
261+
; CHECK-NEXT: %bv1:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), %const(s32)
262+
; CHECK-NEXT: %sub:_(<4 x s32>), %o:_(<4 x s11>) = G_USUBO %bv, %bv1
263+
; CHECK-NEXT: %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s11>)
264+
; CHECK-NEXT: $q0 = COPY %sub(<4 x s32>)
265+
; CHECK-NEXT: $q1 = COPY %o_wide(<4 x s32>)
266+
; CHECK-NEXT: RET_ReallyLR implicit $w0
267+
%0:_(s32) = COPY $w0
268+
%1:_(s32) = COPY $w0
269+
%2:_(s32) = COPY $w0
270+
%3:_(s32) = COPY $w0
271+
%const:_(s32) = G_CONSTANT i32 512
272+
%bv:_(<4 x s32>) = G_BUILD_VECTOR %const(s32), %0(s32), %1(s32), %2(s32)
273+
%bv1:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %const(s32)
274+
%sub:_(<4 x s32>), %o:_(<4 x s11>) = G_USUBO %bv, %bv1
275+
%o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s11>)
276+
$q0 = COPY %sub(<4 x s32>)
277+
$q1 = COPY %o_wide
278+
RET_ReallyLR implicit $w0
279+
...

llvm/test/CodeGen/AArch64/popcount.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,9 @@ define i16 @popcount256(ptr nocapture nonnull readonly %0) {
113113
;
114114
; GISEL-LABEL: popcount256:
115115
; GISEL: // %bb.0: // %Entry
116-
; GISEL-NEXT: ldp x8, x9, [x0, #16]
116+
; GISEL-NEXT: ldp x8, x9, [x0]
117117
; GISEL-NEXT: mov v0.d[0], x8
118-
; GISEL-NEXT: ldp x8, x10, [x0]
118+
; GISEL-NEXT: ldp x8, x10, [x0, #16]
119119
; GISEL-NEXT: mov v1.d[0], x8
120120
; GISEL-NEXT: mov v0.d[1], x9
121121
; GISEL-NEXT: mov v1.d[1], x10

0 commit comments

Comments
 (0)