@@ -1028,6 +1028,148 @@ void CustomSafeOptPass::visitExtractElementInst(ExtractElementInst &I)
1028
1028
}
1029
1029
}
1030
1030
1031
+ #if LLVM_VERSION_MAJOR >= 7
1032
+ // /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1033
+ // This pass removes dead local memory loads and stores. If we remove all such loads and stores, we also
1034
+ // remove all local memory fences together with barriers that follow.
1035
+ //
1036
+ IGC_INITIALIZE_PASS_BEGIN (TrivialLocalMemoryOpsElimination, " TrivialLocalMemoryOpsElimination" , " TrivialLocalMemoryOpsElimination" , false , false )
1037
+ IGC_INITIALIZE_PASS_END(TrivialLocalMemoryOpsElimination, " TrivialLocalMemoryOpsElimination" , " TrivialLocalMemoryOpsElimination" , false , false )
1038
+
1039
+ char TrivialLocalMemoryOpsElimination::ID = 0;
1040
+
1041
+ TrivialLocalMemoryOpsElimination::TrivialLocalMemoryOpsElimination () : FunctionPass(ID)
1042
+ {
1043
+ initializeTrivialLocalMemoryOpsEliminationPass (*PassRegistry::getPassRegistry ());
1044
+ }
1045
+
1046
+ bool TrivialLocalMemoryOpsElimination::runOnFunction (Function &F)
1047
+ {
1048
+ bool change = false ;
1049
+ visit (F);
1050
+ if (!abortPass && (m_LocalLoadsToRemove.empty () || m_LocalStoresToRemove.empty ()))
1051
+ {
1052
+ for (StoreInst *Inst : m_LocalStoresToRemove)
1053
+ {
1054
+ Inst->eraseFromParent ();
1055
+ change = true ;
1056
+ }
1057
+
1058
+ for (LoadInst *Inst : m_LocalLoadsToRemove)
1059
+ {
1060
+ if (Inst->use_empty ())
1061
+ {
1062
+ Inst->eraseFromParent ();
1063
+ change = true ;
1064
+ }
1065
+ }
1066
+
1067
+ for (CallInst *Inst : m_LocalFencesBariersToRemove)
1068
+ {
1069
+ Inst->eraseFromParent ();
1070
+ change = true ;
1071
+ }
1072
+ }
1073
+ m_LocalStoresToRemove.clear ();
1074
+ m_LocalLoadsToRemove.clear ();
1075
+ m_LocalFencesBariersToRemove.clear ();
1076
+
1077
+
1078
+ return change;
1079
+ }
1080
+
1081
+ /*
1082
+ OCL instruction barrier(CLK_LOCAL_MEM_FENCE); is translate to two instructions
1083
+ call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true)
1084
+ call void @llvm.genx.GenISA.threadgroupbarrier()
1085
+
1086
+ if we remove call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true)
1087
+ we must remove next instruction if it is call void @llvm.genx.GenISA.threadgroupbarrier()
1088
+ */
1089
+ void TrivialLocalMemoryOpsElimination::findNextThreadGroupBarrierInst (Instruction &I)
1090
+ {
1091
+ auto nextInst = I.getNextNonDebugInstruction ();
1092
+ if (isa<GenIntrinsicInst>(nextInst))
1093
+ {
1094
+ GenIntrinsicInst *II = dyn_cast<GenIntrinsicInst>(nextInst);
1095
+ if (II->getIntrinsicID () == GenISAIntrinsic::GenISA_threadgroupbarrier)
1096
+ {
1097
+ m_LocalFencesBariersToRemove.push_back (dyn_cast<CallInst>(nextInst));
1098
+ }
1099
+ }
1100
+ }
1101
+
1102
+ void TrivialLocalMemoryOpsElimination::visitLoadInst (LoadInst &I)
1103
+ {
1104
+ if (I.getPointerAddressSpace () == ADDRESS_SPACE_LOCAL)
1105
+ {
1106
+ m_LocalLoadsToRemove.push_back (&I);
1107
+ }
1108
+ }
1109
+
1110
+ void TrivialLocalMemoryOpsElimination::visitStoreInst (StoreInst &I)
1111
+ {
1112
+ if (I.getPointerAddressSpace () == ADDRESS_SPACE_LOCAL)
1113
+ {
1114
+ m_LocalStoresToRemove.push_back (&I);
1115
+ }
1116
+ }
1117
+
1118
+ bool TrivialLocalMemoryOpsElimination::isLocalBarrier (CallInst &I)
1119
+ {
1120
+ // check arguments in call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true) if match to
1121
+ // (i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true) it is local barrier
1122
+ std::vector<bool > argumentsOfMemoryBarrier;
1123
+
1124
+ for (auto arg = I.arg_begin (); arg != I.arg_end (); ++arg)
1125
+ {
1126
+ ConstantInt* ci = dyn_cast<ConstantInt>(arg);
1127
+ argumentsOfMemoryBarrier.push_back (ci->getValue ().getBoolValue ());
1128
+ }
1129
+
1130
+ return argumentsOfMemoryBarrier == m_argumentsOfLocalMemoryBarrier;
1131
+ }
1132
+
1133
+ // If any call instruction use pointer to local memory abort pass execution
1134
+ void TrivialLocalMemoryOpsElimination::anyCallInstUseLocalMemory (CallInst &I)
1135
+ {
1136
+ Function* fn = I.getCalledFunction ();
1137
+
1138
+ if (fn != NULL )
1139
+ {
1140
+ for (auto arg = fn->arg_begin (); arg != fn->arg_end (); ++arg)
1141
+ {
1142
+ if (arg->getType ()->isPointerTy ())
1143
+ {
1144
+ if (arg->getType ()->getPointerAddressSpace () == ADDRESS_SPACE_LOCAL || arg->getType ()->getPointerAddressSpace () == ADDRESS_SPACE_GENERIC) abortPass = true ;
1145
+ }
1146
+ }
1147
+ }
1148
+ }
1149
+
1150
+ void TrivialLocalMemoryOpsElimination::visitCallInst (CallInst &I)
1151
+ {
1152
+ // detect only: llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true)
1153
+ // (note: the first and last arguments are true)
1154
+ // and add them with immediately following barriers to m_LocalFencesBariersToRemove
1155
+ anyCallInstUseLocalMemory (I);
1156
+
1157
+ if (isa<GenIntrinsicInst>(I))
1158
+ {
1159
+ GenIntrinsicInst *II = dyn_cast<GenIntrinsicInst>(&I);
1160
+ if (II->getIntrinsicID () == GenISAIntrinsic::GenISA_memoryfence)
1161
+ {
1162
+ if (isLocalBarrier (I))
1163
+ {
1164
+ m_LocalFencesBariersToRemove.push_back (&I);
1165
+ findNextThreadGroupBarrierInst (I);
1166
+ }
1167
+ }
1168
+ }
1169
+
1170
+ }
1171
+ #endif
1172
+
1031
1173
// Register pass to igc-opt
1032
1174
#define PASS_FLAG2 " igc-gen-specific-pattern"
1033
1175
#define PASS_DESCRIPTION2 " LastPatternMatch Pass"
0 commit comments