@@ -862,9 +862,10 @@ bool mlir::affine::hasNoInterveningEffect(Operation *start, T memOp) {
862
862
// / other operations will overwrite the memory loaded between the given load
863
863
// / and store. If such a value exists, the replaced `loadOp` will be added to
864
864
// / `loadOpsToErase` and its memref will be added to `memrefsToErase`.
865
- static LogicalResult forwardStoreToLoad (
866
- AffineReadOpInterface loadOp, SmallVectorImpl<Operation *> &loadOpsToErase,
867
- SmallPtrSetImpl<Value> &memrefsToErase, DominanceInfo &domInfo) {
865
+ static void forwardStoreToLoad (AffineReadOpInterface loadOp,
866
+ SmallVectorImpl<Operation *> &loadOpsToErase,
867
+ SmallPtrSetImpl<Value> &memrefsToErase,
868
+ DominanceInfo &domInfo) {
868
869
869
870
// The store op candidate for forwarding that satisfies all conditions
870
871
// to replace the load, if any.
@@ -911,21 +912,20 @@ static LogicalResult forwardStoreToLoad(
911
912
}
912
913
913
914
if (!lastWriteStoreOp)
914
- return failure () ;
915
+ return ;
915
916
916
917
// Perform the actual store to load forwarding.
917
918
Value storeVal =
918
919
cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore ();
919
920
// Check if 2 values have the same shape. This is needed for affine vector
920
921
// loads and stores.
921
922
if (storeVal.getType () != loadOp.getValue ().getType ())
922
- return failure () ;
923
+ return ;
923
924
loadOp.getValue ().replaceAllUsesWith (storeVal);
924
925
// Record the memref for a later sweep to optimize away.
925
926
memrefsToErase.insert (loadOp.getMemRef ());
926
927
// Record this to erase later.
927
928
loadOpsToErase.push_back (loadOp);
928
- return success ();
929
929
}
930
930
931
931
template bool
@@ -995,16 +995,16 @@ static void loadCSE(AffineReadOpInterface loadA,
995
995
MemRefAccess srcAccess (loadB);
996
996
MemRefAccess destAccess (loadA);
997
997
998
- // 1. The accesses have to be to the same location.
998
+ // 1. The accesses should be to be to the same location.
999
999
if (srcAccess != destAccess) {
1000
1000
continue ;
1001
1001
}
1002
1002
1003
- // 2. The store has to dominate the load op to be candidate .
1003
+ // 2. loadB should dominate loadA .
1004
1004
if (!domInfo.dominates (loadB, loadA))
1005
1005
continue ;
1006
1006
1007
- // 3. There is no write between loadA and loadB.
1007
+ // 3. There should not be a write between loadA and loadB.
1008
1008
if (!affine::hasNoInterveningEffect<MemoryEffects::Write>(
1009
1009
loadB.getOperation (), loadA))
1010
1010
continue ;
@@ -1073,13 +1073,8 @@ void mlir::affine::affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
1073
1073
1074
1074
// Walk all load's and perform store to load forwarding.
1075
1075
f.walk ([&](AffineReadOpInterface loadOp) {
1076
- if (failed (
1077
- forwardStoreToLoad (loadOp, opsToErase, memrefsToErase, domInfo))) {
1078
- loadCSE (loadOp, opsToErase, domInfo);
1079
- }
1076
+ forwardStoreToLoad (loadOp, opsToErase, memrefsToErase, domInfo);
1080
1077
});
1081
-
1082
- // Erase all load op's whose results were replaced with store fwd'ed ones.
1083
1078
for (auto *op : opsToErase)
1084
1079
op->erase ();
1085
1080
opsToErase.clear ();
@@ -1088,9 +1083,9 @@ void mlir::affine::affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
1088
1083
f.walk ([&](AffineWriteOpInterface storeOp) {
1089
1084
findUnusedStore (storeOp, opsToErase, postDomInfo);
1090
1085
});
1091
- // Erase all store op's which don't impact the program
1092
1086
for (auto *op : opsToErase)
1093
1087
op->erase ();
1088
+ opsToErase.clear ();
1094
1089
1095
1090
// Check if the store fwd'ed memrefs are now left with only stores and
1096
1091
// deallocs and can thus be completely deleted. Note: the canonicalize pass
@@ -1114,6 +1109,15 @@ void mlir::affine::affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
1114
1109
user->erase ();
1115
1110
defOp->erase ();
1116
1111
}
1112
+
1113
+ // To eliminate as many loads as possible, run load CSE after eliminating
1114
+ // stores. Otherwise, some stores are wrongly seen as having an intervening
1115
+ // effect.
1116
+ f.walk ([&](AffineReadOpInterface loadOp) {
1117
+ loadCSE (loadOp, opsToErase, domInfo);
1118
+ });
1119
+ for (auto *op : opsToErase)
1120
+ op->erase ();
1117
1121
}
1118
1122
1119
1123
// Perform the replacement in `op`.
0 commit comments