Skip to content

Commit 34f1385

Browse files
krystian-andrzejewskiigcbot
authored andcommitted
Support write-sync-ret cases for SLM in SynchronizationObjectCoalescing
This change is to support cases where a fence which synchronizes SLM writes is proceeded by a end of thread operation. Such fences cannot be removed.
1 parent 46b574b commit 34f1385

File tree

1 file changed

+35
-0
lines changed

1 file changed

+35
-0
lines changed

IGC/Compiler/Optimizer/SynchronizationObjectCoalescing.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ enum InstructionMask : uint32_t
6464
BufferWriteOperation = (1 << 6),
6565
SharedMemoryReadOperation = (1 << 7),
6666
SharedMemoryWriteOperation = (1 << 8),
67+
EndOfThreadOperation = (1 << 9),
6768
};
6869
constexpr InstructionMask AllNoAtomicMask =
6970
InstructionMask{ ((1 << 9) - 1) & ~InstructionMask::AtomicOperation };
@@ -122,6 +123,7 @@ inline constexpr InstructionMask& operator|=(InstructionMask& a, InstructionMask
122123
/// - any write instruction, this synchronization instruction, any atomic instruction (RAW or WAW),
123124
/// - any atomic instruction, this synchronization instruction, any read instruction (RAW),
124125
/// - any atomic instruction, this synchronization instruction, any write instruction (WAR or WAW),
126+
/// - any write instruction, this synchronization instruction, any return instruction (WAE),
125127
/// - (only for barriers or dependent fences****) any read instruction, this synchronization instruction,
126128
/// any write instruction (WAR),
127129
/// - any read instruction, this synchronization instruction, any atomic instruction (RAW),
@@ -253,6 +255,7 @@ class SynchronizationObjectCoalescingAnalysis : public llvm::FunctionPass
253255
ReadSyncAtomic = 0x20,
254256
WriteSyncRead = 0x40,
255257
AtomicSyncAtomic = 0x80,
258+
WriteSyncRet = 0x100
256259
};
257260

258261
static constexpr SynchronizationCaseMask sc_FullSynchronizationCaseMask = static_cast<SynchronizationCaseMask>(
@@ -390,6 +393,9 @@ class SynchronizationObjectCoalescingAnalysis : public llvm::FunctionPass
390393
////////////////////////////////////////////////////////////////////////
391394
static bool IsSharedMemoryWriteOperation(const llvm::Instruction* pInst);
392395

396+
////////////////////////////////////////////////////////////////////////
397+
static bool IsReturnOperation(const llvm::Instruction* pInst);
398+
393399
////////////////////////////////////////////////////////////////////////
394400
static bool IsThreadBarrierOperation(const llvm::Instruction* pInst);
395401

@@ -912,9 +918,24 @@ InstructionMask SynchronizationObjectCoalescingAnalysis::GetDefaultMemoryInstruc
912918
{
913919
IGC_ASSERT(0);
914920
}
921+
922+
if (static_cast<uint32_t>(result & SharedMemoryWriteOperation) != 0)
923+
{
924+
925+
result = static_cast<InstructionMask>(
926+
result |
927+
EndOfThreadOperation);
928+
}
929+
915930
return result;
916931
}
917932

933+
////////////////////////////////////////////////////////////////////////
934+
bool SynchronizationObjectCoalescingAnalysis::IsReturnOperation(const llvm::Instruction* pInst)
935+
{
936+
return llvm::isa<llvm::ReturnInst>(pInst);
937+
}
938+
918939
////////////////////////////////////////////////////////////////////////
919940
/// @brief Go through a basic block according to the iterator direction
920941
/// until any boundary instruction is met. Meanwhile, this function collects
@@ -1263,6 +1284,14 @@ SynchronizationObjectCoalescingAnalysis::SynchronizationCaseMask Synchronization
12631284
result = static_cast<SynchronizationCaseMask>(result | SynchronizationCaseMask::WriteSyncWrite);
12641285
}
12651286

1287+
// write -> fence -> ret
1288+
bool isWriteSyncRetCase = (writeBit == SharedMemoryWriteOperation) && ((localBackwardMemoryInstructionMask & writeBit) != 0 &&
1289+
(localForwardMemoryInstructionMask & EndOfThreadOperation) != 0);
1290+
if (isWriteSyncRetCase)
1291+
{
1292+
result = static_cast<SynchronizationCaseMask>(result | SynchronizationCaseMask::WriteSyncRet);
1293+
}
1294+
12661295
// atomic -> barrier/fence -> read
12671296
bool isAtomicSyncReadCase = ((localBackwardMemoryInstructionMask & AtomicOperation) != 0 && (localForwardMemoryInstructionMask & readBit) != 0);
12681297
if (isAtomicSyncReadCase)
@@ -1320,6 +1349,8 @@ SynchronizationObjectCoalescingAnalysis::SynchronizationCaseMask Synchronization
13201349
// fences doesn't provide any guarantees for the order of instruction execution between threads
13211350
strictSynchronizationCaseMask = static_cast<SynchronizationCaseMask>((~SynchronizationCaseMask::AtomicSyncAtomic) & strictSynchronizationCaseMask);
13221351

1352+
strictSynchronizationCaseMask = static_cast<SynchronizationCaseMask>((SynchronizationCaseMask::WriteSyncRet) | strictSynchronizationCaseMask);
1353+
13231354
// Note: Please change the description in igc flags if the value is changed.
13241355
static_assert(SynchronizationCaseMask::ReadSyncWrite == 0x01);
13251356
bool disableReadFenceWriteCase = (IGC_GET_FLAG_VALUE(SynchronizationObjectCoalescingConfig) & SynchronizationCaseMask::ReadSyncWrite) != 0;
@@ -2224,6 +2255,10 @@ InstructionMask SynchronizationObjectCoalescingAnalysis::GetInstructionMask(cons
22242255
return InstructionMask::BufferReadOperation |
22252256
InstructionMask::BufferWriteOperation;
22262257
}
2258+
else if (IsReturnOperation(pInst))
2259+
{
2260+
return InstructionMask::EndOfThreadOperation;
2261+
}
22272262

22282263
return InstructionMask::None;
22292264
}

0 commit comments

Comments
 (0)