Skip to content

[GlobalISel] convergence control tokens and intrinsics #67006

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ class CallLowering {
/// vreg that the swifterror should be copied into after the call.
Register SwiftErrorVReg;

/// Valid if the call is a controlled convergent operation.
Register ConvergenceCtrlToken;

/// Original IR callsite corresponding to this call, if available.
const CallBase *CB = nullptr;

Expand Down Expand Up @@ -584,6 +587,7 @@ class CallLowering {
bool lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &Call,
ArrayRef<Register> ResRegs,
ArrayRef<ArrayRef<Register>> ArgRegs, Register SwiftErrorVReg,
Register ConvergenceCtrlToken,
std::function<unsigned()> GetCalleeReg) const;

/// For targets which want to use big-endian can enable it with
Expand Down
21 changes: 21 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,10 @@ class IRTranslator : public MachineFunctionPass {
return false;
}

bool translateConvergenceControlIntrinsic(const CallInst &CI,
Intrinsic::ID ID,
MachineIRBuilder &MIRBuilder);

/// @}

// Builder for machine instruction a la IRBuilder.
Expand Down Expand Up @@ -697,6 +701,23 @@ class IRTranslator : public MachineFunctionPass {
return Regs[0];
}

Register getOrCreateConvergenceTokenVReg(const Value &Token) {
assert(Token.getType()->isTokenTy());
auto &Regs = *VMap.getVRegs(Token);
if (!Regs.empty()) {
assert(Regs.size() == 1 &&
"Expected a single register for convergence tokens.");
return Regs[0];
}

auto Reg = MRI->createGenericVirtualRegister(LLT::token());
Regs.push_back(Reg);
auto &Offsets = *VMap.getOffsets(Token);
if (Offsets.empty())
Offsets.push_back(0);
return Reg;
}

/// Allocate some vregs and offsets in the VMap. Then populate just the
/// offsets while leaving the vregs empty.
ValueToVRegInfo::VRegListT &allocateVRegs(const Value &Val);
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Target/TargetMachine.h"
Expand Down Expand Up @@ -91,6 +92,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
ArrayRef<Register> ResRegs,
ArrayRef<ArrayRef<Register>> ArgRegs,
Register SwiftErrorVReg,
Register ConvergenceCtrlToken,
std::function<unsigned()> GetCalleeReg) const {
CallLoweringInfo Info;
const DataLayout &DL = MIRBuilder.getDataLayout();
Expand Down Expand Up @@ -121,7 +123,6 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
CanBeTailCalled = false;
}


// First step is to marshall all the function's parameters into the correct
// physregs and memory locations. Gather the sequence of argument types that
// we'll pass to the assigner function.
Expand Down Expand Up @@ -187,6 +188,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
Info.CallConv = CallConv;
Info.SwiftErrorVReg = SwiftErrorVReg;
Info.ConvergenceCtrlToken = ConvergenceCtrlToken;
Info.IsMustTailCall = CB.isMustTailCall();
Info.IsTailCall = CanBeTailCalled;
Info.IsVarArg = IsVarArg;
Expand Down
60 changes: 54 additions & 6 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,9 @@ ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
auto *VRegs = VMap.getVRegs(Val);
auto *Offsets = VMap.getOffsets(Val);

assert(Val.getType()->isSized() &&
"Don't know how to create an empty vreg");
if (!Val.getType()->isTokenTy())
assert(Val.getType()->isSized() &&
"Don't know how to create an empty vreg");

SmallVector<LLT, 4> SplitTys;
computeValueLLTs(*DL, *Val.getType(), SplitTys,
Expand Down Expand Up @@ -2038,6 +2039,36 @@ bool IRTranslator::translateIfEntryValueArgument(bool isDeclare, Value *Val,
return true;
}

static unsigned getConvOpcode(Intrinsic::ID ID) {
switch (ID) {
default:
llvm_unreachable("Unexpected intrinsic");
case Intrinsic::experimental_convergence_anchor:
return TargetOpcode::CONVERGENCECTRL_ANCHOR;
case Intrinsic::experimental_convergence_entry:
return TargetOpcode::CONVERGENCECTRL_ENTRY;
case Intrinsic::experimental_convergence_loop:
return TargetOpcode::CONVERGENCECTRL_LOOP;
}
}

bool IRTranslator::translateConvergenceControlIntrinsic(
const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) {
MachineInstrBuilder MIB = MIRBuilder.buildInstr(getConvOpcode(ID));
Register OutputReg = getOrCreateConvergenceTokenVReg(CI);
MIB.addDef(OutputReg);

if (ID == Intrinsic::experimental_convergence_loop) {
auto Bundle = CI.getOperandBundle(LLVMContext::OB_convergencectrl);
assert(Bundle && "Expected a convergence control token.");
Register InputReg =
getOrCreateConvergenceTokenVReg(*Bundle->Inputs[0].get());
MIB.addUse(InputReg);
}

return true;
}

bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineIRBuilder &MIRBuilder) {
if (auto *MI = dyn_cast<AnyMemIntrinsic>(&CI)) {
Expand Down Expand Up @@ -2479,7 +2510,10 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
#include "llvm/IR/ConstrainedOps.def"
return translateConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(CI),
MIRBuilder);

case Intrinsic::experimental_convergence_anchor:
case Intrinsic::experimental_convergence_entry:
case Intrinsic::experimental_convergence_loop:
return translateConvergenceControlIntrinsic(CI, ID, MIRBuilder);
}
return false;
}
Expand Down Expand Up @@ -2530,12 +2564,18 @@ bool IRTranslator::translateCallBase(const CallBase &CB,
}
}

Register ConvergenceCtrlToken = 0;
if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) {
const auto &Token = *Bundle->Inputs[0].get();
ConvergenceCtrlToken = getOrCreateConvergenceTokenVReg(Token);
}

// We don't set HasCalls on MFI here yet because call lowering may decide to
// optimize into tail calls. Instead, we defer that to selection where a final
// scan is done to check if any instructions are calls.
bool Success =
CLI->lowerCall(MIRBuilder, CB, Res, Args, SwiftErrorVReg,
[&]() { return getOrCreateVReg(*CB.getCalledOperand()); });
bool Success = CLI->lowerCall(
MIRBuilder, CB, Res, Args, SwiftErrorVReg, ConvergenceCtrlToken,
[&]() { return getOrCreateVReg(*CB.getCalledOperand()); });

// Check if we just inserted a tail call.
if (Success) {
Expand Down Expand Up @@ -2649,6 +2689,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
MF->getMachineMemOperand(MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata()));
}

if (CI.isConvergent()) {
if (auto Bundle = CI.getOperandBundle(LLVMContext::OB_convergencectrl)) {
auto *Token = Bundle->Inputs[0].get();
Register TokenReg = getOrCreateVReg(*Token);
MIB.addUse(TokenReg, RegState::Implicit);
}
}

return true;
}

Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,14 @@ bool InlineAsmLowering::lowerInlineAsm(
}
}

if (auto Bundle = Call.getOperandBundle(LLVMContext::OB_convergencectrl)) {
auto *Token = Bundle->Inputs[0].get();
ArrayRef<Register> SourceRegs = GetOrCreateVRegs(*Token);
assert(SourceRegs.size() == 1 &&
"Expected the control token to fit into a single virtual register");
Inst.addUse(SourceRegs[0], RegState::Implicit);
}

if (const MDNode *SrcLoc = Call.getMetadata("srcloc"))
Inst.addMetadata(SrcLoc);

Expand Down
13 changes: 8 additions & 5 deletions llvm/lib/CodeGen/MIRParser/MIParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1919,10 +1919,13 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {

if (Token.range().front() == 's') {
auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
if (!verifyScalarSize(ScalarSize))
return error("invalid size for scalar type");

Ty = LLT::scalar(ScalarSize);
if (ScalarSize) {
if (!verifyScalarSize(ScalarSize))
return error("invalid size for scalar type");
Ty = LLT::scalar(ScalarSize);
} else {
Ty = LLT::token();
}
lex();
return false;
} else if (Token.range().front() == 'p') {
Expand Down Expand Up @@ -1980,7 +1983,7 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
if (Token.range().front() == 's') {
auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
if (!verifyScalarSize(ScalarSize))
return error("invalid size for scalar type");
return error("invalid size for scalar element in vector");
Ty = LLT::scalar(ScalarSize);
} else if (Token.range().front() == 'p') {
const DataLayout &DL = MF.getDataLayout();
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/IR/ConvergenceVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,14 @@ GenericConvergenceVerifier<SSAContext>::findAndCheckConvergenceTokenUsed(

template <>
bool GenericConvergenceVerifier<SSAContext>::isInsideConvergentFunction(
const InstructionT &I) {
const Instruction &I) {
auto *F = I.getFunction();
return F->isConvergent();
}

template <>
bool GenericConvergenceVerifier<SSAContext>::isConvergent(
const InstructionT &I) {
const Instruction &I) {
if (auto *CB = dyn_cast<CallBase>(&I)) {
return CB->isConvergent();
}
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1301,6 +1301,9 @@ bool AMDGPUCallLowering::lowerTailCall(
if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
return false;

if (Info.ConvergenceCtrlToken) {
MIB.addUse(Info.ConvergenceCtrlToken, RegState::Implicit);
}
handleImplicitCallArguments(MIRBuilder, MIB, ST, *FuncInfo, CalleeCC,
ImplicitArgRegs);

Expand Down Expand Up @@ -1483,6 +1486,9 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,

const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

if (Info.ConvergenceCtrlToken) {
MIB.addUse(Info.ConvergenceCtrlToken, RegState::Implicit);
}
handleImplicitCallArguments(MIRBuilder, MIB, ST, *MFI, Info.CallConv,
ImplicitArgRegs);

Expand Down
59 changes: 48 additions & 11 deletions llvm/test/CodeGen/AMDGPU/convergence-tokens.ll
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
; RUN: llc --amdgpu-disable-structurizer -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s
; RUN: llc --amdgpu-disable-structurizer -stop-after=dead-mi-elimination -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,DEADMI %s
; RUN: llc --amdgpu-disable-structurizer -global-isel -stop-after=irtranslator -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,GISEL

; CHECK-LABEL: name: basic_call
; CHECK: [[TOKEN:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_ENTRY
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ENTRY
; ISEL: {{.*}} SI_CALL_ISEL {{.*}}, @foo, [[TOKEN]], csr_amdgpu, {{.*}}
; DEADMI: {{.*}} SI_CALL {{.*}}, @foo, csr_amdgpu, {{.*}}, implicit [[TOKEN]]
; GISEL: {{.*}} G_SI_CALL {{.*}}, @foo, csr_amdgpu, {{.*}}, implicit [[TOKEN]]
define i32 @basic_call(i32 %src) #0 {
%t = call token @llvm.experimental.convergence.entry()
%r = call i32 @foo(i32 %src) [ "convergencectrl"(token %t) ]
ret i32 %r
}

; CHECK-LABEL: name: basic_intrinsic
; CHECK: [[TOKEN:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_ANCHOR
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR
; ISEL: CONVERGENCECTRL_GLUE [[TOKEN]]
; DEADMI-NOT: CONVERGENCECTRL_GLUE
; CHECK: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[TOKEN]]
; ISEL: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[TOKEN]]
; GISEL: {{.*}} = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane){{.*}}, implicit [[TOKEN]]
define i32 @basic_intrinsic(i32 %src) #0 {
%t = call token @llvm.experimental.convergence.anchor()
%r = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t) ]
Expand All @@ -30,12 +33,13 @@ define i32 @uncontrolled_call(i32 %src) #0 {
}

; CHECK-LABEL: name: basic_branch
; CHECK: bb.0.entry:
; CHECK: [[TOKEN:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_ANCHOR
; CHECK: bb.1.then:
; CHECK: bb.[[#]].entry:
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR
; CHECK: bb.[[#]].then:
; ISEL: CONVERGENCECTRL_GLUE [[TOKEN]]
; DEADMI-NOT: CONVERGENCECTRL_GLUE
; CHECK: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[TOKEN]]
; ISEL: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[TOKEN]]
; GISEL: {{.*}} = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane){{.*}}, implicit [[TOKEN]]
define i32 @basic_branch(i32 %src, i1 %cond) #0 {
entry:
%t = call token @llvm.experimental.convergence.anchor()
Expand All @@ -52,12 +56,13 @@ else:
}

; CHECK-LABEL: name: basic_loop
; CHECK: [[TOKEN:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_ANCHOR
; CHECK: bb.1.loop:
; CHECK: [[LOOP:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_LOOP [[TOKEN]]
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR
; CHECK: bb.[[#]].loop:
; CHECK: [[LOOP:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_LOOP [[TOKEN]]
; ISEL: CONVERGENCECTRL_GLUE [[LOOP]]
; DEADMI-NOT: CONVERGENCECTRL_GLUE
; CHECK: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[LOOP]]
; ISEL: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[LOOP]]
; GISEL: {{.*}} = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane){{.*}}, implicit [[LOOP]]
define i32 @basic_loop(i32 %src, i1 %cond) #0 {
%t1 = call token @llvm.experimental.convergence.anchor()
br label %loop
Expand All @@ -71,6 +76,38 @@ end:
ret i32 %r
}

; CHECK-LABEL: name: nested
; CHECK: [[ENTRY:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ENTRY
; CHECK: [[ANCHOR:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR
; ISEL: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[ANCHOR]]
; GISEL: {{.*}} = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane){{.*}}, implicit [[ANCHOR]]
; ISEL: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[ENTRY]]
; GISEL: {{.*}} = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane){{.*}}, implicit [[ENTRY]]
define i32 @nested(i32 %src) #0 {
%t1 = call token @llvm.experimental.convergence.entry()
%t2 = call token @llvm.experimental.convergence.anchor()
%r2 = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t2) ]
%r1 = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t1) ]
%sum = add i32 %r1, %r2
ret i32 %sum
}

; COM: FIXME: Tokens on tail-call have not been implemented for SelectionDAG
; COM: yet; the corresponding checks have been commented out.
;
; CHECK-LABEL: name: tail_call_void_func_void
; GISEL: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ENTRY
; COM: CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ENTRY
; COM: ISEL: {{.*}} SI_CALL_ISEL {{.*}}, @external_void_func_void, [[TOKEN]], csr_amdgpu, {{.*}}
; COM: DEADMI: {{.*}} SI_CALL {{.*}}, @external_void_func_void, csr_amdgpu, {{.*}}, implicit [[TOKEN]]
; GISEL: {{.*}} SI_TCRETURN {{.*}}, @external_void_func_void, 0, csr_amdgpu, implicit [[TOKEN]]
define void @tail_call_void_func_void() #0 {
%t1 = call token @llvm.experimental.convergence.entry()
tail call void @external_void_func_void() [ "convergencectrl"(token %t1) ]
ret void
}

declare hidden void @external_void_func_void() #0
declare i32 @foo(i32 %x) #0

declare i32 @llvm.amdgcn.readfirstlane(i32) #0
Expand Down
10 changes: 0 additions & 10 deletions llvm/test/CodeGen/MIR/AArch64/parse-low-level-type-invalid4.mir

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ name: test_vector_element_size_0
body: |
bb.0:
liveins: $x0
; CHECK: [[@LINE+1]]:15: invalid size for scalar type
; CHECK: [[@LINE+1]]:15: invalid size for scalar element in vector
%0:_(<2 x s0>) = G_IMPLICIT_DEF
...