-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Use mangling-agnostic form of IRBuilder::CreateIntrinsic. NFC. #87638
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Use the form of CreateIntrinsic that takes an explicit return type and works out the mangling based on that and the types of the arguments. The advantage is that this still works if intrinsics are changed to have type mangling, e.g. if readlane/readfirstlane/writelane are changed to work on any type. I changed all occurrences of CreateIntrinsic in the AMDGPU backend for consistency. I think it works out neater in most (but perhaps not all) cases.
@llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) ChangesUse the form of CreateIntrinsic that takes an explicit return type and I changed all occurrences of CreateIntrinsic in the AMDGPU backend for Patch is 21.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/87638.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index dbb3de76b4ddae..8e9b3a504f5875 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -413,7 +413,7 @@ Value *AMDGPUAtomicOptimizerImpl::buildReduction(IRBuilder<> &B,
assert(ST->hasPermLaneX16());
V = B.CreateBitCast(V, IntNTy);
Value *Permlanex16Call = B.CreateIntrinsic(
- Intrinsic::amdgcn_permlanex16, {},
+ V->getType(), Intrinsic::amdgcn_permlanex16,
{V, V, B.getInt32(-1), B.getInt32(-1), B.getFalse(), B.getFalse()});
V = buildNonAtomicBinOp(B, Op, B.CreateBitCast(V, AtomicTy),
B.CreateBitCast(Permlanex16Call, AtomicTy));
@@ -425,7 +425,7 @@ Value *AMDGPUAtomicOptimizerImpl::buildReduction(IRBuilder<> &B,
// Reduce across the upper and lower 32 lanes.
V = B.CreateBitCast(V, IntNTy);
Value *Permlane64Call =
- B.CreateIntrinsic(Intrinsic::amdgcn_permlane64, {}, V);
+ B.CreateIntrinsic(V->getType(), Intrinsic::amdgcn_permlane64, V);
return buildNonAtomicBinOp(B, Op, B.CreateBitCast(V, AtomicTy),
B.CreateBitCast(Permlane64Call, AtomicTy));
}
@@ -481,7 +481,7 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B,
assert(ST->hasPermLaneX16());
V = B.CreateBitCast(V, IntNTy);
Value *PermX = B.CreateIntrinsic(
- Intrinsic::amdgcn_permlanex16, {},
+ V->getType(), Intrinsic::amdgcn_permlanex16,
{V, V, B.getInt32(-1), B.getInt32(-1), B.getFalse(), B.getFalse()});
Value *UpdateDPPCall =
@@ -493,8 +493,8 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B,
if (!ST->isWave32()) {
// Combine lane 31 into lanes 32..63.
V = B.CreateBitCast(V, IntNTy);
- Value *const Lane31 = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
- {V, B.getInt32(31)});
+ Value *const Lane31 = B.CreateIntrinsic(
+ V->getType(), Intrinsic::amdgcn_readlane, {V, B.getInt32(31)});
Value *UpdateDPPCall = B.CreateCall(
UpdateDPP, {Identity, Lane31, B.getInt32(DPP::QUAD_PERM_ID),
@@ -574,7 +574,7 @@ std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
auto NeedResult = !I.use_empty();
auto *Ballot =
- B.CreateIntrinsic(Intrinsic::amdgcn_ballot, WaveTy, B.getTrue());
+ B.CreateIntrinsic(WaveTy, Intrinsic::amdgcn_ballot, B.getTrue());
// Start inserting instructions for ComputeLoop block
B.SetInsertPoint(ComputeLoop);
@@ -591,15 +591,15 @@ std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
// Use llvm.cttz instrinsic to find the lowest remaining active lane.
auto *FF1 =
- B.CreateIntrinsic(Intrinsic::cttz, WaveTy, {ActiveBits, B.getTrue()});
+ B.CreateIntrinsic(WaveTy, Intrinsic::cttz, {ActiveBits, B.getTrue()});
Type *IntNTy = B.getIntNTy(Ty->getPrimitiveSizeInBits());
auto *LaneIdxInt = B.CreateTrunc(FF1, IntNTy);
// Get the value required for atomic operation
V = B.CreateBitCast(V, IntNTy);
- Value *LaneValue =
- B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {}, {V, LaneIdxInt});
+ Value *LaneValue = B.CreateIntrinsic(V->getType(), Intrinsic::amdgcn_readlane,
+ {V, LaneIdxInt});
LaneValue = B.CreateBitCast(LaneValue, Ty);
// Perform writelane if intermediate scan results are required later in the
@@ -607,7 +607,7 @@ std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
Value *OldValue = nullptr;
if (NeedResult) {
OldValue =
- B.CreateIntrinsic(Intrinsic::amdgcn_writelane, {},
+ B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_writelane,
{B.CreateBitCast(Accumulator, IntNTy), LaneIdxInt,
B.CreateBitCast(OldValuePhi, IntNTy)});
OldValue = B.CreateBitCast(OldValue, Ty);
@@ -696,7 +696,8 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
// Record I's original position as the entry block.
PixelEntryBB = I.getParent();
- Value *const Cond = B.CreateIntrinsic(Intrinsic::amdgcn_ps_live, {}, {});
+ Value *const Cond =
+ B.CreateIntrinsic(B.getInt1Ty(), Intrinsic::amdgcn_ps_live, {});
Instruction *const NonHelperTerminator =
SplitBlockAndInsertIfThen(Cond, &I, false, nullptr, &DTU, nullptr);
@@ -722,7 +723,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
// this by doing a ballot of active lanes.
Type *const WaveTy = B.getIntNTy(ST->getWavefrontSize());
CallInst *const Ballot =
- B.CreateIntrinsic(Intrinsic::amdgcn_ballot, WaveTy, B.getTrue());
+ B.CreateIntrinsic(WaveTy, Intrinsic::amdgcn_ballot, B.getTrue());
// We need to know how many lanes are active within the wavefront that are
// below us. If we counted each lane linearly starting from 0, a lane is
@@ -730,15 +731,15 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
// using the mbcnt intrinsic.
Value *Mbcnt;
if (ST->isWave32()) {
- Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {},
+ Mbcnt = B.CreateIntrinsic(WaveTy, Intrinsic::amdgcn_mbcnt_lo,
{Ballot, B.getInt32(0)});
} else {
Value *const ExtractLo = B.CreateTrunc(Ballot, Int32Ty);
Value *const ExtractHi = B.CreateTrunc(B.CreateLShr(Ballot, 32), Int32Ty);
- Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {},
+ Mbcnt = B.CreateIntrinsic(Int32Ty, Intrinsic::amdgcn_mbcnt_lo,
{ExtractLo, B.getInt32(0)});
- Mbcnt =
- B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {}, {ExtractHi, Mbcnt});
+ Mbcnt = B.CreateIntrinsic(Int32Ty, Intrinsic::amdgcn_mbcnt_hi,
+ {ExtractHi, Mbcnt});
}
Function *F = I.getFunction();
@@ -769,7 +770,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
// that they can correctly contribute to the final result.
V = B.CreateBitCast(V, IntNTy);
Identity = B.CreateBitCast(Identity, IntNTy);
- NewV = B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, IntNTy,
+ NewV = B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_set_inactive,
{V, Identity});
NewV = B.CreateBitCast(NewV, Ty);
V = B.CreateBitCast(V, Ty);
@@ -789,12 +790,12 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
assert(TyBitWidth == 32);
NewV = B.CreateBitCast(NewV, IntNTy);
- NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
+ NewV = B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_readlane,
{NewV, LastLaneIdx});
NewV = B.CreateBitCast(NewV, Ty);
}
// Finally mark the readlanes in the WWM section.
- NewV = B.CreateIntrinsic(Intrinsic::amdgcn_strict_wwm, Ty, NewV);
+ NewV = B.CreateIntrinsic(Ty, Intrinsic::amdgcn_strict_wwm, NewV);
} else if (ScanImpl == ScanOptions::Iterative) {
// Alternative implementation for scan
ComputeLoop = BasicBlock::Create(C, "ComputeLoop", F);
@@ -925,10 +926,10 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
Value *const ExtractLo = B.CreateTrunc(CastedPhi, Int32Ty);
Value *const ExtractHi =
B.CreateTrunc(B.CreateLShr(CastedPhi, 32), Int32Ty);
- CallInst *const ReadFirstLaneLo =
- B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
- CallInst *const ReadFirstLaneHi =
- B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
+ CallInst *const ReadFirstLaneLo = B.CreateIntrinsic(
+ Int32Ty, Intrinsic::amdgcn_readfirstlane, ExtractLo);
+ CallInst *const ReadFirstLaneHi = B.CreateIntrinsic(
+ Int32Ty, Intrinsic::amdgcn_readfirstlane, ExtractHi);
Value *const PartialInsert = B.CreateInsertElement(
PoisonValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
Value *const Insert =
@@ -937,7 +938,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
} else if (TyBitWidth == 32) {
Value *CastedPhi = B.CreateBitCast(PHI, IntNTy);
BroadcastI =
- B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, CastedPhi);
+ B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_readfirstlane, CastedPhi);
BroadcastI = B.CreateBitCast(BroadcastI, Ty);
} else {
@@ -952,7 +953,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
if (ValDivergent) {
if (ScanImpl == ScanOptions::DPP) {
LaneOffset =
- B.CreateIntrinsic(Intrinsic::amdgcn_strict_wwm, Ty, ExclScan);
+ B.CreateIntrinsic(Ty, Intrinsic::amdgcn_strict_wwm, ExclScan);
} else if (ScanImpl == ScanOptions::Iterative) {
LaneOffset = ExclScan;
} else {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 6e7d34f5adaa3f..4dd1f65ffa28c7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -675,7 +675,7 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
: Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
Intrinsic::ID ID =
IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
- Value *Result = Builder.CreateIntrinsic(ID, {IntrinTy}, {LHS, RHS});
+ Value *Result = Builder.CreateIntrinsic(IntrinTy, ID, {LHS, RHS});
Result = IsSigned ? Builder.CreateSExtOrTrunc(Result, DstTy)
: Builder.CreateZExtOrTrunc(Result, DstTy);
ResultVals.push_back(Result);
@@ -769,8 +769,8 @@ std::pair<Value *, Value *>
AMDGPUCodeGenPrepareImpl::getFrexpResults(IRBuilder<> &Builder,
Value *Src) const {
Type *Ty = Src->getType();
- Value *Frexp = Builder.CreateIntrinsic(Intrinsic::frexp,
- {Ty, Builder.getInt32Ty()}, Src);
+ Type *FrexpTy = StructType::get(Ty, Builder.getInt32Ty());
+ Value *Frexp = Builder.CreateIntrinsic(FrexpTy, Intrinsic::frexp, Src);
Value *FrexpMant = Builder.CreateExtractValue(Frexp, {0});
// Bypass the bug workaround for the exponent result since it doesn't matter.
@@ -779,8 +779,8 @@ AMDGPUCodeGenPrepareImpl::getFrexpResults(IRBuilder<> &Builder,
Value *FrexpExp =
ST->hasFractBug()
- ? Builder.CreateIntrinsic(Intrinsic::amdgcn_frexp_exp,
- {Builder.getInt32Ty(), Ty}, Src)
+ ? Builder.CreateIntrinsic(Builder.getInt32Ty(),
+ Intrinsic::amdgcn_frexp_exp, Src)
: Builder.CreateExtractValue(Frexp, {1});
return {FrexpMant, FrexpExp};
}
@@ -1027,7 +1027,8 @@ Value *AMDGPUCodeGenPrepareImpl::optimizeWithFDivFast(
if (!HasFP32DenormalFlush && !NumIsOne)
return nullptr;
- return Builder.CreateIntrinsic(Intrinsic::amdgcn_fdiv_fast, {}, {Num, Den});
+ return Builder.CreateIntrinsic(Den->getType(), Intrinsic::amdgcn_fdiv_fast,
+ {Num, Den});
}
Value *AMDGPUCodeGenPrepareImpl::visitFDivElement(
@@ -1276,8 +1277,7 @@ Value *AMDGPUCodeGenPrepareImpl::expandDivRem24Impl(
auto FMAD = !ST->hasMadMacF32Insts()
? Intrinsic::fma
: (Intrinsic::ID)Intrinsic::amdgcn_fmad_ftz;
- Value *FR = Builder.CreateIntrinsic(FMAD,
- {FQNeg->getType()}, {FQNeg, FB, FA}, FQ);
+ Value *FR = Builder.CreateIntrinsic(F32Ty, FMAD, {FQNeg, FB, FA}, FQ);
// int iq = (int)fq;
Value *IQ = IsSigned ? Builder.CreateFPToSI(FQ, I32Ty)
@@ -2159,7 +2159,7 @@ Value *AMDGPUCodeGenPrepareImpl::applyFractPat(IRBuilder<> &Builder,
Type *Ty = FractArg->getType()->getScalarType();
for (unsigned I = 0, E = FractVals.size(); I != E; ++I) {
ResultVals[I] =
- Builder.CreateIntrinsic(Intrinsic::amdgcn_fract, {Ty}, {FractVals[I]});
+ Builder.CreateIntrinsic(Ty, Intrinsic::amdgcn_fract, {FractVals[I]});
}
return insertValues(Builder, FractArg->getType(), ResultVals);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 5b7fa13f2e835f..b12bc85dbc0ba8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -830,8 +830,8 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
// fmed3((fpext X), (fpext Y), (fpext Z)) -> fpext (fmed3(X, Y, Z))
if (matchFPExtFromF16(Src0, X) && matchFPExtFromF16(Src1, Y) &&
matchFPExtFromF16(Src2, Z)) {
- Value *NewCall = IC.Builder.CreateIntrinsic(IID, {X->getType()},
- {X, Y, Z}, &II, II.getName());
+ Value *NewCall = IC.Builder.CreateIntrinsic(X->getType(), IID, {X, Y, Z},
+ &II, II.getName());
return new FPExtInst(NewCall, II.getType());
}
@@ -994,8 +994,8 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
// %b32 = call i32 ballot.i32(...)
// %b64 = zext i32 %b32 to i64
Value *Call = IC.Builder.CreateZExt(
- IC.Builder.CreateIntrinsic(Intrinsic::amdgcn_ballot,
- {IC.Builder.getInt32Ty()},
+ IC.Builder.CreateIntrinsic(IC.Builder.getInt32Ty(),
+ Intrinsic::amdgcn_ballot,
{II.getArgOperand(0)}),
II.getType());
Call->takeName(&II);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index 9083150b338488..ac65a53325f468 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -877,7 +877,7 @@ class SplitPtrStructs : public InstVisitor<SplitPtrStructs, PtrParts> {
void setAlign(CallInst *Intr, Align A, unsigned RsrcArgIdx);
void insertPreMemOpFence(AtomicOrdering Order, SyncScope::ID SSID);
void insertPostMemOpFence(AtomicOrdering Order, SyncScope::ID SSID);
- Value *handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr, Type *Ty,
+ Value *handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr,
Align Alignment, AtomicOrdering Order,
bool IsVolatile, SyncScope::ID SSID);
@@ -1199,9 +1199,8 @@ void SplitPtrStructs::insertPostMemOpFence(AtomicOrdering Order,
}
Value *SplitPtrStructs::handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr,
- Type *Ty, Align Alignment,
- AtomicOrdering Order, bool IsVolatile,
- SyncScope::ID SSID) {
+ Align Alignment, AtomicOrdering Order,
+ bool IsVolatile, SyncScope::ID SSID) {
IRB.SetInsertPoint(I);
auto [Rsrc, Off] = getPtrParts(Ptr);
@@ -1299,7 +1298,7 @@ Value *SplitPtrStructs::handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr,
}
}
- auto *Call = IRB.CreateIntrinsic(IID, Ty, Args);
+ auto *Call = IRB.CreateIntrinsic(I->getType(), IID, Args);
copyMetadata(Call, I);
setAlign(Call, Alignment, Arg ? 1 : 0);
Call->takeName(I);
@@ -1319,9 +1318,8 @@ PtrParts SplitPtrStructs::visitInstruction(Instruction &I) {
PtrParts SplitPtrStructs::visitLoadInst(LoadInst &LI) {
if (!isSplitFatPtr(LI.getPointerOperandType()))
return {nullptr, nullptr};
- handleMemoryInst(&LI, nullptr, LI.getPointerOperand(), LI.getType(),
- LI.getAlign(), LI.getOrdering(), LI.isVolatile(),
- LI.getSyncScopeID());
+ handleMemoryInst(&LI, nullptr, LI.getPointerOperand(), LI.getAlign(),
+ LI.getOrdering(), LI.isVolatile(), LI.getSyncScopeID());
return {nullptr, nullptr};
}
@@ -1329,9 +1327,8 @@ PtrParts SplitPtrStructs::visitStoreInst(StoreInst &SI) {
if (!isSplitFatPtr(SI.getPointerOperandType()))
return {nullptr, nullptr};
Value *Arg = SI.getValueOperand();
- handleMemoryInst(&SI, Arg, SI.getPointerOperand(), Arg->getType(),
- SI.getAlign(), SI.getOrdering(), SI.isVolatile(),
- SI.getSyncScopeID());
+ handleMemoryInst(&SI, Arg, SI.getPointerOperand(), SI.getAlign(),
+ SI.getOrdering(), SI.isVolatile(), SI.getSyncScopeID());
return {nullptr, nullptr};
}
@@ -1339,9 +1336,8 @@ PtrParts SplitPtrStructs::visitAtomicRMWInst(AtomicRMWInst &AI) {
if (!isSplitFatPtr(AI.getPointerOperand()->getType()))
return {nullptr, nullptr};
Value *Arg = AI.getValOperand();
- handleMemoryInst(&AI, Arg, AI.getPointerOperand(), Arg->getType(),
- AI.getAlign(), AI.getOrdering(), AI.isVolatile(),
- AI.getSyncScopeID());
+ handleMemoryInst(&AI, Arg, AI.getPointerOperand(), AI.getAlign(),
+ AI.getOrdering(), AI.isVolatile(), AI.getSyncScopeID());
return {nullptr, nullptr};
}
@@ -1367,7 +1363,7 @@ PtrParts SplitPtrStructs::visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI) {
if (AI.isVolatile())
Aux |= AMDGPU::CPol::VOLATILE;
auto *Call =
- IRB.CreateIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap, Ty,
+ IRB.CreateIntrinsic(Ty, Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap,
{AI.getNewValOperand(), AI.getCompareOperand(), Rsrc,
Off, IRB.getInt32(0), IRB.getInt32(Aux)});
copyMetadata(Call, &AI);
@@ -1727,8 +1723,8 @@ PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) {
return {nullptr, nullptr};
IRB.SetInsertPoint(&I);
auto [Rsrc, Off] = getPtrParts(Ptr);
- Type *NewTy = PointerType::get(I.getContext(), AMDGPUAS::BUFFER_RESOURCE);
- auto *NewRsrc = IRB.CreateIntrinsic(IID, {NewTy}, {I.getOperand(0), Rsrc});
+ auto *NewRsrc =
+ IRB.CreateIntrinsic(I.getType(), IID, {I.getOperand(0), Rsrc});
copyMetadata(NewRsrc, &I);
NewRsrc->takeName(&I);
SplitUsers.insert(&I);
@@ -1743,8 +1739,8 @@ PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) {
Value *RealRsrc = getPtrParts(RealPtr).first;
Value *InvPtr = I.getArgOperand(0);
Value *Size = I.getArgOperand(1);
- Value *NewRsrc = IRB.CreateIntrinsic(IID, {RealRsrc->getType()},
- {InvPtr, Size, RealRsrc});
+ Value *NewRsrc =
+ IRB.CreateIntrinsic(IRB.getVoidTy(), IID, {InvPtr, Size, RealRsrc});
copyMetadata(NewRsrc, &I);
NewRsrc->takeName(&I);
SplitUsers.insert(&I);
@@ -1758,7 +1754,7 @@ PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) {
return {nullptr, nullptr};
IRB.SetInsertPoint(&I);
auto [Rsrc, Off] = getPtrParts(Ptr);
- Value *NewRsrc = IRB.CreateIntrinsic(IID, {Rsrc->getType()}, {Rsrc});
+ Value *NewRsrc = IRB.CreateIntrinsic(Rsrc->getType(), IID, {Rsrc});
copyMetadata(NewRsrc, &I);
NewRsrc->takeName(&I);
SplitUsers.insert(&I);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 31077dbc0b2cc4..0a5344c9a40b33 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1098,7 +1098,7 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
MaskOp = B.CreateTrunc(MaskOp, MaskTy);
}
- return B.CreateIntrinsic(Intrinsic::ptrmask, {NewV->getType(), MaskTy},
+ return B.CreateIntrinsic(NewV->getType(), Intrinsic::ptrmask,
{NewV, MaskOp});
}
case Intrinsic::amdgcn_flat_atomic_fadd:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 14948ef9ea8d17..28d4cf9312515e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16401,8 +16401,9 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicR...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe just change the ones for the intrinsics that will change?
@@ -574,7 +574,7 @@ std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively( | |||
auto NeedResult = !I.use_empty(); | |||
|
|||
auto *Ballot = | |||
B.CreateIntrinsic(Intrinsic::amdgcn_ballot, WaveTy, B.getTrue()); | |||
B.CreateIntrinsic(WaveTy, Intrinsic::amdgcn_ballot, B.getTrue()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No real plus here
@@ -591,23 +591,23 @@ std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively( | |||
|
|||
// Use llvm.cttz instrinsic to find the lowest remaining active lane. | |||
auto *FF1 = | |||
B.CreateIntrinsic(Intrinsic::cttz, WaveTy, {ActiveBits, B.getTrue()}); | |||
B.CreateIntrinsic(WaveTy, Intrinsic::cttz, {ActiveBits, B.getTrue()}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No plus here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
AtomicOptimizer changes LGTM.
If I was doing that, I would just do it as part of changing the intrinsic. The only point of this patch was the "for consistency" argument. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Having been pulled in for tangential review, the changes to the fat pointer code seem fine, but I'm not in a position to give thoughts on the PR as a whole.
I don't think this is much of a win. It does add extra work in the underlying implementation |
Fair enough. |
OK, I did that for readlane/readfirstlane/writelane in #91583. |
Use the form of CreateIntrinsic that takes an explicit return type and
works out the mangling based on that and the types of the arguments. The
advantage is that this still works if intrinsics are changed to have
type mangling, e.g. if readlane/readfirstlane/writelane are changed to
work on any type.
I changed all occurrences of CreateIntrinsic in the AMDGPU backend for
consistency. I think it works out neater in most (but perhaps not all)
cases.