Skip to content

AMDGPU: Partially clean up canonicalized predicates in tablegen #85404

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,19 @@ def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{
return fp16SrcZerosHighBits(N->getOpcode());
}]>;

def is_canonicalized : PatLeaf<(fAny srcvalue:$src), [{
const SITargetLowering &Lowering =
*static_cast<const SITargetLowering *>(getTargetLowering());
return Lowering.isCanonicalized(*CurDAG, SDValue(N, 0));
}]> {
let GISelPredicateCode = [{
const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
MF.getSubtarget().getTargetLowering());
const MachineOperand &Dst = MI.getOperand(0);
assert(Dst.isDef());
return TLI->isCanonicalized(Dst.getReg(), MF);
}];
}

//===----------------------------------------------------------------------===//
// MUBUF/SMEM Patterns
Expand Down
30 changes: 6 additions & 24 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2946,30 +2946,12 @@ def : GCNPat<

// If fcanonicalize's operand is implicitly canonicalized, we only need a copy.
let AddedComplexity = 1000 in {
def : GCNPat<
(is_canonicalized_1<fcanonicalize> f16:$src),
(COPY f16:$src)
>;

def : GCNPat<
(is_canonicalized_1<fcanonicalize> v2f16:$src),
(COPY v2f16:$src)
>;

def : GCNPat<
(is_canonicalized_1<fcanonicalize> f32:$src),
(COPY f32:$src)
>;

def : GCNPat<
(is_canonicalized_1<fcanonicalize> v2f32:$src),
(COPY v2f32:$src)
>;

def : GCNPat<
(is_canonicalized_1<fcanonicalize> f64:$src),
(COPY f64:$src)
>;
foreach vt = [f16, v2f16, f32, v2f32, f64] in {
def : GCNPat<
(fcanonicalize (vt is_canonicalized:$src)),
(COPY vt:$src)
>;
}
}

// Prefer selecting to max when legal, but using mul is always valid.
Expand Down