-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Handle lowering addrspace casts from LDS to FLAT address in amdgpu-sw-lower-lds. #121214
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -192,8 +192,7 @@ class AMDGPUSwLowerLDS { | |
void getLDSMemoryInstructions(Function *Func, | ||
SetVector<Instruction *> &LDSInstructions); | ||
void replaceKernelLDSAccesses(Function *Func); | ||
Value *getTranslatedGlobalMemoryGEPOfLDSPointer(Value *LoadMallocPtr, | ||
Value *LDSPtr); | ||
Value *getTranslatedGlobalMemoryPtrOfLDS(Value *LoadMallocPtr, Value *LDSPtr); | ||
void translateLDSMemoryOperationsToGlobalMemory( | ||
Function *Func, Value *LoadMallocPtr, | ||
SetVector<Instruction *> &LDSInstructions); | ||
|
@@ -655,20 +654,37 @@ void AMDGPUSwLowerLDS::getLDSMemoryInstructions( | |
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(&Inst)) { | ||
if (XCHG->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) | ||
LDSInstructions.insert(&Inst); | ||
} else if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&Inst)) { | ||
if (ASC->getSrcAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && | ||
ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) | ||
LDSInstructions.insert(&Inst); | ||
} else | ||
continue; | ||
} | ||
} | ||
} | ||
|
||
Value * | ||
AMDGPUSwLowerLDS::getTranslatedGlobalMemoryGEPOfLDSPointer(Value *LoadMallocPtr, | ||
Value *AMDGPUSwLowerLDS::getTranslatedGlobalMemoryPtrOfLDS(Value *LoadMallocPtr, | ||
Value *LDSPtr) { | ||
assert(LDSPtr && "Invalid LDS pointer operand"); | ||
Type *LDSPtrType = LDSPtr->getType(); | ||
|
||
if (LDSPtrType->isVectorTy()) { | ||
// Handle vector of pointers | ||
VectorType *VecPtrTy = cast<VectorType>(LDSPtrType); | ||
ElementCount NumElements = VecPtrTy->getElementCount(); | ||
Type *Int32VecTy = VectorType::get(IRB.getInt32Ty(), NumElements); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can use DL.getIntPtrType for this instead of hardcoding the address space size There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed it. |
||
Value *PtrToInt = IRB.CreatePtrToInt(LDSPtr, Int32VecTy); | ||
// Create vector of pointers to global address space | ||
Type *GlobalPtrVecTy = | ||
VectorType::get(IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS), NumElements); | ||
Value *GlobalPtrVec = | ||
IRB.CreateInBoundsGEP(IRB.getInt8Ty(), LoadMallocPtr, PtrToInt); | ||
GlobalPtrVec = IRB.CreateBitCast(GlobalPtrVec, GlobalPtrVecTy); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This bitcast is unnecessary since the change to opaque pointers There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated. |
||
return GlobalPtrVec; | ||
} | ||
Value *PtrToInt = IRB.CreatePtrToInt(LDSPtr, IRB.getInt32Ty()); | ||
Value *GEP = | ||
IRB.CreateInBoundsGEP(IRB.getInt8Ty(), LoadMallocPtr, {PtrToInt}); | ||
return GEP; | ||
return IRB.CreateInBoundsGEP(IRB.getInt8Ty(), LoadMallocPtr, {PtrToInt}); | ||
} | ||
|
||
void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory( | ||
|
@@ -681,7 +697,7 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory( | |
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { | ||
Value *LIOperand = LI->getPointerOperand(); | ||
Value *Replacement = | ||
getTranslatedGlobalMemoryGEPOfLDSPointer(LoadMallocPtr, LIOperand); | ||
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, LIOperand); | ||
LoadInst *NewLI = IRB.CreateAlignedLoad(LI->getType(), Replacement, | ||
LI->getAlign(), LI->isVolatile()); | ||
NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); | ||
|
@@ -691,7 +707,7 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory( | |
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { | ||
Value *SIOperand = SI->getPointerOperand(); | ||
Value *Replacement = | ||
getTranslatedGlobalMemoryGEPOfLDSPointer(LoadMallocPtr, SIOperand); | ||
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, SIOperand); | ||
StoreInst *NewSI = IRB.CreateAlignedStore( | ||
SI->getValueOperand(), Replacement, SI->getAlign(), SI->isVolatile()); | ||
NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); | ||
|
@@ -701,8 +717,8 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory( | |
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) { | ||
Value *RMWPtrOperand = RMW->getPointerOperand(); | ||
Value *RMWValOperand = RMW->getValOperand(); | ||
Value *Replacement = getTranslatedGlobalMemoryGEPOfLDSPointer( | ||
LoadMallocPtr, RMWPtrOperand); | ||
Value *Replacement = | ||
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, RMWPtrOperand); | ||
AtomicRMWInst *NewRMW = IRB.CreateAtomicRMW( | ||
RMW->getOperation(), Replacement, RMWValOperand, RMW->getAlign(), | ||
RMW->getOrdering(), RMW->getSyncScopeID()); | ||
|
@@ -712,8 +728,8 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory( | |
RMW->eraseFromParent(); | ||
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(Inst)) { | ||
Value *XCHGPtrOperand = XCHG->getPointerOperand(); | ||
Value *Replacement = getTranslatedGlobalMemoryGEPOfLDSPointer( | ||
LoadMallocPtr, XCHGPtrOperand); | ||
Value *Replacement = | ||
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, XCHGPtrOperand); | ||
AtomicCmpXchgInst *NewXCHG = IRB.CreateAtomicCmpXchg( | ||
Replacement, XCHG->getCompareOperand(), XCHG->getNewValOperand(), | ||
XCHG->getAlign(), XCHG->getSuccessOrdering(), | ||
|
@@ -722,6 +738,16 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory( | |
AsanInfo.Instructions.insert(NewXCHG); | ||
XCHG->replaceAllUsesWith(NewXCHG); | ||
XCHG->eraseFromParent(); | ||
} else if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(Inst)) { | ||
Value *AIOperand = ASC->getPointerOperand(); | ||
Value *Replacement = | ||
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, AIOperand); | ||
Value *NewAI = IRB.CreateAddrSpaceCast(Replacement, ASC->getType()); | ||
// Note: No need to add the instruction to AsanInfo instructions to be | ||
// instrumented list. FLAT_ADDRESS ptr would have been already | ||
// instrumented by asan pass prior to this pass. | ||
ASC->replaceAllUsesWith(NewAI); | ||
ASC->eraseFromParent(); | ||
} else | ||
report_fatal_error("Unimplemented LDS lowering instruction"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is missing quite a lot of cases With addrspacecast unhandled, this was previously a fatal error? I don't see a fatal error in your testcase now There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The latest commit now handles addrspace cast with vector of ptrs. Please let me know if I missed any other cases here. |
||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use dyn_cast instead of isVectorTy + cast
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in latest commit. Thanks