Skip to content

[GISel][CombinerHelper] Push freeze through non-poison-producing operands #90618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,9 @@ class CombinerHelper {
/// Combine insert vector element OOB.
bool matchInsertVectorElementOOB(MachineInstr &MI, BuildFnTy &MatchInfo);

bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI,
BuildFnTy &MatchInfo);

private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
Expand Down
11 changes: 11 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,17 @@ class GenericMachineInstr : public MachineInstr {
static bool classof(const MachineInstr *MI) {
return isPreISelGenericOpcode(MI->getOpcode());
}

bool hasPoisonGeneratingFlags() const {
return getFlags() & (NoUWrap | NoSWrap | IsExact | Disjoint | NonNeg |
FmNoNans | FmNoInfs);
}

void dropPoisonGeneratingFlags() {
clearFlags(NoUWrap | NoSWrap | IsExact | Disjoint | NonNeg | FmNoNans |
FmNoInfs);
assert(!hasPoisonGeneratingFlags());
}
};

/// Provides common memory operand functionality.
Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/MachineInstr.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,12 @@ class MachineInstr
Flags &= ~((uint32_t)Flag);
}

void clearFlags(unsigned flags) {
assert(isUInt<LLVM_MI_FLAGS_BITS>(flags) &&
"flags to be cleared are out of range for the Flags field");
Flags &= ~flags;
}

/// Return true if MI is in a bundle (but not the first MI in a bundle).
///
/// A bundle looks like this before it's finalized:
Expand Down
10 changes: 9 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,13 @@ def idempotent_prop : GICombineRule<
(match (idempotent_prop_frags $dst, $src)),
(apply (GIReplaceReg $dst, $src))>;

// Convert freeze(Op(Op0, NonPoisonOps...)) to Op(freeze(Op0), NonPoisonOps...)
// when Op0 is not guaranteed non-poison
def push_freeze_to_prevent_poison_from_propagating : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (G_FREEZE $dst, $src):$root,
[{ return !isGuaranteedNotToBePoison(${src}.getReg(), MRI) && Helper.matchFreezeOfSingleMaybePoisonOperand(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;

def extending_loads : GICombineRule<
(defs root:$root, extending_load_matchdata:$matchinfo),
Expand Down Expand Up @@ -1713,7 +1720,8 @@ def all_combines : GICombineGroup<[trivial_combines, vector_ops_combines,
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
sext_trunc, zext_trunc, combine_shuffle_concat]>;
sext_trunc, zext_trunc, combine_shuffle_concat,
push_freeze_to_prevent_poison_from_propagating]>;

// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
Expand Down
64 changes: 64 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,70 @@ void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
replaceRegWith(MRI, DstReg, SrcReg);
}

bool CombinerHelper::matchFreezeOfSingleMaybePoisonOperand(
MachineInstr &MI, BuildFnTy &MatchInfo) {
// Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
Register DstOp = MI.getOperand(0).getReg();
Register OrigOp = MI.getOperand(1).getReg();

if (!MRI.hasOneNonDBGUse(OrigOp))
return false;

MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
// Even if only a single operand of the PHI is not guaranteed non-poison,
// moving freeze() backwards across a PHI can cause optimization issues for
// other users of that operand.
//
// Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
// the source register is unprofitable because it makes the freeze() more
// strict than is necessary (it would affect the whole register instead of
// just the subreg being frozen).
if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
return false;

if (canCreateUndefOrPoison(OrigOp, MRI,
/*ConsiderFlagsAndMetadata=*/false))
return false;

std::optional<MachineOperand> MaybePoisonOperand;
for (MachineOperand &Operand : OrigDef->uses()) {
if (!Operand.isReg())
return false;

if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
continue;

if (!MaybePoisonOperand)
MaybePoisonOperand = Operand;
else {
// We have more than one maybe-poison operand. Moving the freeze is
// unsafe.
return false;
}
}

cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();

// Eliminate freeze if all operands are guaranteed non-poison.
if (!MaybePoisonOperand) {
MatchInfo = [=](MachineIRBuilder &B) { MRI.replaceRegWith(DstOp, OrigOp); };
return true;
}

Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);

MatchInfo = [=](MachineIRBuilder &B) mutable {
B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shouldn't be necessary anymore

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new freeze has to be inserted before the use which will come before the old freeze that is being removed, so the insert point has to be moved back.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The OrigDef is the root freeze instruction. The insert point for the apply is supposed to be set to before the root instruction, so this is just setting it to what it already should be

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, OrigDef is the instruction that is being frozen. This function gets called on freeze(op(op0, ops...)), where OrigDef is op and the insert point is before the freeze but after op. We have to transform this to op(freeze(op0), ops...), so it necessarily requires moving the insert point before op.

Removing the setInsertPt call causes the following tests to fail:

Failed Tests (2):
  LLVM :: CodeGen/AArch64/GlobalISel/combine-select.mir
  LLVM :: CodeGen/AMDGPU/div_v2i128.ll

Where the first test has freezes inserted in the wrong place, for example:

        683:   
        684:  %0:_(s64) = COPY $x0 
        685:  %2:_(s64) = COPY $x2 
        686:  %c:_(s1) = G_TRUNC %0(s64) 
        687:  %f:_(s1) = G_TRUNC %11(s64) 
        688:  %11:_(s64) = G_FREEZE %2 
next:147      !~~~~~~~~~~~~~~~~~~~~~~~  error: match on wrong line
        689:  %sel:_(s1) = G_OR %c, %f 
        690:  %ext:_(s32) = G_ANYEXT %sel(s1) 
        691:  $w0 = COPY %ext(s32) 
        692:  
        693: ... 

and the second test fails in the machine verifier:

*** Bad machine code: Virtual register defs don't dominate all uses. ***
- function:    v_sdiv_v2i128_vv
- v. register: %964

auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
replaceRegOpWith(
MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
Freeze.getReg(0));
replaceRegWith(MRI, DstOp, OrigOp);
};
return true;
}

bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI,
SmallVector<Register> &Ops) {
assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
Expand Down
24 changes: 21 additions & 3 deletions llvm/lib/CodeGen/GlobalISel/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1745,11 +1745,20 @@ static bool canCreateUndefOrPoison(Register Reg, const MachineRegisterInfo &MRI,
UndefPoisonKind Kind) {
MachineInstr *RegDef = MRI.getVRegDef(Reg);

if (auto *GMI = dyn_cast<GenericMachineInstr>(RegDef)) {
if (ConsiderFlagsAndMetadata && includesPoison(Kind) &&
GMI->hasPoisonGeneratingFlags())
return true;
} else {
// Conservatively return true.
return true;
}

switch (RegDef->getOpcode()) {
case TargetOpcode::G_FREEZE:
return false;
default:
return true;
return !isa<GCastOp>(RegDef) && !isa<GBinOp>(RegDef);
}
}

Expand All @@ -1767,8 +1776,17 @@ static bool isGuaranteedNotToBeUndefOrPoison(Register Reg,
return true;
case TargetOpcode::G_IMPLICIT_DEF:
return !includesUndef(Kind);
default:
return false;
default: {
auto MOCheck = [&](const MachineOperand &MO) {
if (!MO.isReg())
return true;
return ::isGuaranteedNotToBeUndefOrPoison(MO.getReg(), MRI, Depth + 1,
Kind);
};
return !::canCreateUndefOrPoison(Reg, MRI,
/*ConsiderFlagsAndMetadata=*/true, Kind) &&
all_of(RegDef->uses(), MOCheck);
}
}
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/AArch64Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -295,5 +295,6 @@ def AArch64PostLegalizerCombiner
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
select_to_minmax, or_to_bsp, combine_concat_vector,
commute_constant_to_rhs]> {
commute_constant_to_rhs,
push_freeze_to_prevent_poison_from_propagating]> {
}
42 changes: 21 additions & 21 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2
; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY1]](s64)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %f
; CHECK-NEXT: %sel:_(s1) = G_OR %c, [[FREEZE]]
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
; CHECK-NEXT: %f:_(s1) = G_TRUNC [[FREEZE]](s64)
; CHECK-NEXT: %sel:_(s1) = G_OR %c, %f
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
Expand All @@ -144,9 +144,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2
; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY1]](s64)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %f
; CHECK-NEXT: %sel:_(s1) = G_OR %c, [[FREEZE]]
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
; CHECK-NEXT: %f:_(s1) = G_TRUNC [[FREEZE]](s64)
; CHECK-NEXT: %sel:_(s1) = G_OR %c, %f
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
Expand All @@ -172,9 +172,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d2
; CHECK-NEXT: %c:_(<2 x s1>) = G_TRUNC [[COPY]](<2 x s32>)
; CHECK-NEXT: %f:_(<2 x s1>) = G_TRUNC [[COPY1]](<2 x s32>)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s1>) = G_FREEZE %f
; CHECK-NEXT: %sel:_(<2 x s1>) = G_OR %c, [[FREEZE]]
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[COPY1]]
; CHECK-NEXT: %f:_(<2 x s1>) = G_TRUNC [[FREEZE]](<2 x s32>)
; CHECK-NEXT: %sel:_(<2 x s1>) = G_OR %c, %f
; CHECK-NEXT: %ext:_(<2 x s32>) = G_ANYEXT %sel(<2 x s1>)
; CHECK-NEXT: $d0 = COPY %ext(<2 x s32>)
%0:_(<2 x s32>) = COPY $d0
Expand All @@ -201,9 +201,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %t
; CHECK-NEXT: %sel:_(s1) = G_AND %c, [[FREEZE]]
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
; CHECK-NEXT: %t:_(s1) = G_TRUNC [[FREEZE]](s64)
; CHECK-NEXT: %sel:_(s1) = G_AND %c, %t
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
Expand All @@ -229,9 +229,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %t
; CHECK-NEXT: %sel:_(s1) = G_AND %c, [[FREEZE]]
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
; CHECK-NEXT: %t:_(s1) = G_TRUNC [[FREEZE]](s64)
; CHECK-NEXT: %sel:_(s1) = G_AND %c, %t
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
Expand All @@ -257,11 +257,11 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
; CHECK-NEXT: %t:_(s1) = G_TRUNC [[FREEZE]](s64)
; CHECK-NEXT: %one:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR %c, %one
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %t
; CHECK-NEXT: %sel:_(s1) = G_OR [[XOR]], [[FREEZE]]
; CHECK-NEXT: %sel:_(s1) = G_OR [[XOR]], %t
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
Expand All @@ -287,11 +287,11 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2
; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY1]](s64)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
; CHECK-NEXT: %f:_(s1) = G_TRUNC [[FREEZE]](s64)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR %c, [[C]]
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %f
; CHECK-NEXT: %sel:_(s1) = G_AND [[XOR]], [[FREEZE]]
; CHECK-NEXT: %sel:_(s1) = G_AND [[XOR]], %f
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/pr58431.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
define i32 @f(i64 %0) {
; CHECK-LABEL: f:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #10
; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: mov w8, #10 // =0xa
; CHECK-NEXT: and x9, x0, #0xffffffff
; CHECK-NEXT: udiv x10, x9, x8
; CHECK-NEXT: msub x0, x10, x8, x9
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
Expand Down
Loading