@@ -64,6 +64,7 @@ enum InstructionMask : uint32_t
64
64
BufferWriteOperation = (1 << 6 ),
65
65
SharedMemoryReadOperation = (1 << 7 ),
66
66
SharedMemoryWriteOperation = (1 << 8 ),
67
+ EndOfThreadOperation = (1 << 9 ),
67
68
};
68
69
constexpr InstructionMask AllNoAtomicMask =
69
70
InstructionMask{ ((1 << 9 ) - 1 ) & ~InstructionMask::AtomicOperation };
@@ -122,6 +123,7 @@ inline constexpr InstructionMask& operator|=(InstructionMask& a, InstructionMask
122
123
// / - any write instruction, this synchronization instruction, any atomic instruction (RAW or WAW),
123
124
// / - any atomic instruction, this synchronization instruction, any read instruction (RAW),
124
125
// / - any atomic instruction, this synchronization instruction, any write instruction (WAR or WAW),
126
+ // / - any write instruction, this synchronization instruction, any return instruction (WAE),
125
127
// / - (only for barriers or dependent fences****) any read instruction, this synchronization instruction,
126
128
// / any write instruction (WAR),
127
129
// / - any read instruction, this synchronization instruction, any atomic instruction (RAW),
@@ -253,6 +255,7 @@ class SynchronizationObjectCoalescingAnalysis : public llvm::FunctionPass
253
255
ReadSyncAtomic = 0x20 ,
254
256
WriteSyncRead = 0x40 ,
255
257
AtomicSyncAtomic = 0x80 ,
258
+ WriteSyncRet = 0x100
256
259
};
257
260
258
261
static constexpr SynchronizationCaseMask sc_FullSynchronizationCaseMask = static_cast <SynchronizationCaseMask>(
@@ -390,6 +393,9 @@ class SynchronizationObjectCoalescingAnalysis : public llvm::FunctionPass
390
393
// //////////////////////////////////////////////////////////////////////
391
394
static bool IsSharedMemoryWriteOperation (const llvm::Instruction* pInst);
392
395
396
+ // //////////////////////////////////////////////////////////////////////
397
+ static bool IsReturnOperation (const llvm::Instruction* pInst);
398
+
393
399
// //////////////////////////////////////////////////////////////////////
394
400
static bool IsThreadBarrierOperation (const llvm::Instruction* pInst);
395
401
@@ -912,9 +918,24 @@ InstructionMask SynchronizationObjectCoalescingAnalysis::GetDefaultMemoryInstruc
912
918
{
913
919
IGC_ASSERT (0 );
914
920
}
921
+
922
+ if (static_cast <uint32_t >(result & SharedMemoryWriteOperation) != 0 )
923
+ {
924
+
925
+ result = static_cast <InstructionMask>(
926
+ result |
927
+ EndOfThreadOperation);
928
+ }
929
+
915
930
return result;
916
931
}
917
932
933
+ // //////////////////////////////////////////////////////////////////////
934
+ bool SynchronizationObjectCoalescingAnalysis::IsReturnOperation (const llvm::Instruction* pInst)
935
+ {
936
+ return llvm::isa<llvm::ReturnInst>(pInst);
937
+ }
938
+
918
939
// //////////////////////////////////////////////////////////////////////
919
940
// / @brief Go through a basic block according to the iterator direction
920
941
// / until any boundary instruction is met. Meanwhile, this function collects
@@ -1263,6 +1284,14 @@ SynchronizationObjectCoalescingAnalysis::SynchronizationCaseMask Synchronization
1263
1284
result = static_cast <SynchronizationCaseMask>(result | SynchronizationCaseMask::WriteSyncWrite);
1264
1285
}
1265
1286
1287
+ // write -> fence -> ret
1288
+ bool isWriteSyncRetCase = (writeBit == SharedMemoryWriteOperation) && ((localBackwardMemoryInstructionMask & writeBit) != 0 &&
1289
+ (localForwardMemoryInstructionMask & EndOfThreadOperation) != 0 );
1290
+ if (isWriteSyncRetCase)
1291
+ {
1292
+ result = static_cast <SynchronizationCaseMask>(result | SynchronizationCaseMask::WriteSyncRet);
1293
+ }
1294
+
1266
1295
// atomic -> barrier/fence -> read
1267
1296
bool isAtomicSyncReadCase = ((localBackwardMemoryInstructionMask & AtomicOperation) != 0 && (localForwardMemoryInstructionMask & readBit) != 0 );
1268
1297
if (isAtomicSyncReadCase)
@@ -1320,6 +1349,8 @@ SynchronizationObjectCoalescingAnalysis::SynchronizationCaseMask Synchronization
1320
1349
// fences doesn't provide any guarantees for the order of instruction execution between threads
1321
1350
strictSynchronizationCaseMask = static_cast <SynchronizationCaseMask>((~SynchronizationCaseMask::AtomicSyncAtomic) & strictSynchronizationCaseMask);
1322
1351
1352
+ strictSynchronizationCaseMask = static_cast <SynchronizationCaseMask>((SynchronizationCaseMask::WriteSyncRet) | strictSynchronizationCaseMask);
1353
+
1323
1354
// Note: Please change the description in igc flags if the value is changed.
1324
1355
static_assert (SynchronizationCaseMask::ReadSyncWrite == 0x01 );
1325
1356
bool disableReadFenceWriteCase = (IGC_GET_FLAG_VALUE (SynchronizationObjectCoalescingConfig) & SynchronizationCaseMask::ReadSyncWrite) != 0 ;
@@ -2224,6 +2255,10 @@ InstructionMask SynchronizationObjectCoalescingAnalysis::GetInstructionMask(cons
2224
2255
return InstructionMask::BufferReadOperation |
2225
2256
InstructionMask::BufferWriteOperation;
2226
2257
}
2258
+ else if (IsReturnOperation (pInst))
2259
+ {
2260
+ return InstructionMask::EndOfThreadOperation;
2261
+ }
2227
2262
2228
2263
return InstructionMask::None;
2229
2264
}
0 commit comments