Skip to content

Commit a711b04

Browse files
[acc] Initial implementation of MemoryEffects on acc operations (#75970)
The `acc` dialect operations now implement MemoryEffects interfaces in the following ways: - Data entry operations which may read host memory via `varPtr` are now marked as so. The majority of them do NOT actually read the host memory. For example, `acc.present` works on the basis of presence of pointer and not necessarily what the data points to - so they are not marked as reading the host memory. They still use `varPtr` though but this dependency is reflected through ssa. - Data clause operations which may mutate the data pointed to by `accPtr` are marked as doing so. - Data clause operations which update required structured or dynamic runtime counters are marked as reading and writing the newly defined `RuntimeCounters` resource. Some operations, like `acc.getdeviceptr` do not actually use the runtime counters - but are marked as reading them since the address obtained depends on the mapping operations which do update the runtime counters. Namely, `acc.getdeviceptr` cannot be moved across other mapping operations. - Constructs are marked as writing to the `ConstructResource`. This may be too strict but is needed for the following reasons: 1) Structured constructs may not use `accPtr` and instead use `varPtr` - when this is the case, data actions may be removed even when used. 2) Unstructured constructs are currently used to aggregate multiple data actions. We do not want such constructs removed or moved for now. - Terminators are marked as `Pure` as in other dialects. The current approach has the following limitations which may require further improvements: - Subsequent `acc.copyin` operations on same data do not actually read host memory pointed to by `varPtr` but are still marked as so. - Two `acc.delete` operations on same data may not mutate `accPtr` until the runtime counters are zero (but are still marked as mutating). - The `varPtrPtr` argument, when present, points to the address of location of `varPtr`. When mapping to target device, an `accPtrPtr` needs computed and this memory is mutated. This effect is not captured since the current operations do not produce `accPtrPtr`. - Runtime counter effects are imprecise since two operations with differing `varPtr` increment/decrement different counters. Additionally, operations with `varPtrPtr` mutate attachment counters. - The `ConstructResource` is too strict and likely can be relaxed with better modeling.
1 parent 476812a commit a711b04

File tree

4 files changed

+307
-82
lines changed

4 files changed

+307
-82
lines changed

flang/lib/Lower/OpenACC.cpp

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -218,14 +218,18 @@ static void createDeclareDeallocFuncWithArg(
218218
builder.create<mlir::acc::DeclareExitOp>(
219219
loc, mlir::Value{}, mlir::ValueRange(entryOp.getAccPtr()));
220220

221-
mlir::Value varPtr;
222221
if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
223222
std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>)
224-
varPtr = entryOp.getVarPtr();
225-
builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(), varPtr,
226-
entryOp.getBounds(), entryOp.getDataClause(),
227-
/*structured=*/false, /*implicit=*/false,
228-
builder.getStringAttr(*entryOp.getName()));
223+
builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(),
224+
entryOp.getVarPtr(), entryOp.getBounds(),
225+
entryOp.getDataClause(),
226+
/*structured=*/false, /*implicit=*/false,
227+
builder.getStringAttr(*entryOp.getName()));
228+
else
229+
builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(),
230+
entryOp.getBounds(), entryOp.getDataClause(),
231+
/*structured=*/false, /*implicit=*/false,
232+
builder.getStringAttr(*entryOp.getName()));
229233

230234
// Generate the post dealloc function.
231235
modBuilder.setInsertionPointAfter(preDeallocOp);
@@ -368,14 +372,17 @@ static void genDataExitOperations(fir::FirOpBuilder &builder,
368372
for (mlir::Value operand : operands) {
369373
auto entryOp = mlir::dyn_cast_or_null<EntryOp>(operand.getDefiningOp());
370374
assert(entryOp && "data entry op expected");
371-
mlir::Value varPtr;
372375
if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
373376
std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>)
374-
varPtr = entryOp.getVarPtr();
375-
builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(), varPtr,
376-
entryOp.getBounds(), entryOp.getDataClause(),
377-
structured, entryOp.getImplicit(),
378-
builder.getStringAttr(*entryOp.getName()));
377+
builder.create<ExitOp>(
378+
entryOp.getLoc(), entryOp.getAccPtr(), entryOp.getVarPtr(),
379+
entryOp.getBounds(), entryOp.getDataClause(), structured,
380+
entryOp.getImplicit(), builder.getStringAttr(*entryOp.getName()));
381+
else
382+
builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(),
383+
entryOp.getBounds(), entryOp.getDataClause(),
384+
structured, entryOp.getImplicit(),
385+
builder.getStringAttr(*entryOp.getName()));
379386
}
380387
}
381388

@@ -2840,9 +2847,8 @@ static void createDeclareGlobalOp(mlir::OpBuilder &modBuilder,
28402847
else
28412848
builder.create<DeclareOp>(loc, mlir::Value{},
28422849
mlir::ValueRange(entryOp.getAccPtr()));
2843-
mlir::Value varPtr;
28442850
if constexpr (std::is_same_v<GlobalOp, mlir::acc::GlobalDestructorOp>) {
2845-
builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(), varPtr,
2851+
builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(),
28462852
entryOp.getBounds(), entryOp.getDataClause(),
28472853
/*structured=*/false, /*implicit=*/false,
28482854
builder.getStringAttr(*entryOp.getName()));
@@ -2930,14 +2936,18 @@ static void createDeclareDeallocFunc(mlir::OpBuilder &modBuilder,
29302936
builder.create<mlir::acc::DeclareExitOp>(
29312937
loc, mlir::Value{}, mlir::ValueRange(entryOp.getAccPtr()));
29322938

2933-
mlir::Value varPtr;
29342939
if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
29352940
std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>)
2936-
varPtr = entryOp.getVarPtr();
2937-
builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(), varPtr,
2938-
entryOp.getBounds(), entryOp.getDataClause(),
2939-
/*structured=*/false, /*implicit=*/false,
2940-
builder.getStringAttr(*entryOp.getName()));
2941+
builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(),
2942+
entryOp.getVarPtr(), entryOp.getBounds(),
2943+
entryOp.getDataClause(),
2944+
/*structured=*/false, /*implicit=*/false,
2945+
builder.getStringAttr(*entryOp.getName()));
2946+
else
2947+
builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(),
2948+
entryOp.getBounds(), entryOp.getDataClause(),
2949+
/*structured=*/false, /*implicit=*/false,
2950+
builder.getStringAttr(*entryOp.getName()));
29412951

29422952
// Generate the post dealloc function.
29432953
modBuilder.setInsertionPointAfter(preDeallocOp);

mlir/include/mlir/Dialect/OpenACC/OpenACC.h

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,22 @@
4646
mlir::acc::UseDeviceOp, mlir::acc::ReductionOp, \
4747
mlir::acc::DeclareDeviceResidentOp, mlir::acc::DeclareLinkOp, \
4848
mlir::acc::CacheOp
49+
#define ACC_DATA_EXIT_OPS \
50+
mlir::acc::CopyoutOp, mlir::acc::DeleteOp, mlir::acc::DetachOp, \
51+
mlir::acc::UpdateHostOp
52+
#define ACC_DATA_CLAUSE_OPS ACC_DATA_ENTRY_OPS, ACC_DATA_EXIT_OPS
4953
#define ACC_COMPUTE_CONSTRUCT_OPS \
5054
mlir::acc::ParallelOp, mlir::acc::KernelsOp, mlir::acc::SerialOp
5155
#define ACC_COMPUTE_CONSTRUCT_AND_LOOP_OPS \
5256
ACC_COMPUTE_CONSTRUCT_OPS, mlir::acc::LoopOp
57+
#define OPENACC_DATA_CONSTRUCT_STRUCTURED_OPS \
58+
mlir::acc::DataOp, mlir::acc::DeclareOp
59+
#define ACC_DATA_CONSTRUCT_UNSTRUCTURED_OPS \
60+
mlir::acc::EnterDataOp, mlir::acc::ExitDataOp, mlir::acc::UpdateOp, \
61+
mlir::acc::HostDataOp, mlir::acc::DeclareEnterOp, \
62+
mlir::acc::DeclareExitOp
5363
#define ACC_DATA_CONSTRUCT_OPS \
54-
mlir::acc::DataOp, mlir::acc::EnterDataOp, mlir::acc::ExitDataOp, \
55-
mlir::acc::UpdateOp, mlir::acc::HostDataOp, mlir::acc::DeclareEnterOp, \
56-
mlir::acc::DeclareExitOp, mlir::acc::DeclareOp
64+
OPENACC_DATA_CONSTRUCT_STRUCTURED_OPS, ACC_DATA_CONSTRUCT_UNSTRUCTURED_OPS
5765
#define ACC_COMPUTE_AND_DATA_CONSTRUCT_OPS \
5866
ACC_COMPUTE_CONSTRUCT_OPS, ACC_DATA_CONSTRUCT_OPS
5967
#define ACC_COMPUTE_LOOP_AND_DATA_CONSTRUCT_OPS \
@@ -73,9 +81,27 @@ namespace acc {
7381
/// combined and the final mapping value would be 5 (4 | 1).
7482
enum OpenACCExecMapping { NONE = 0, VECTOR = 1, WORKER = 2, GANG = 4 };
7583

76-
/// Used to obtain the `varPtr` from a data entry operation.
77-
/// Returns empty value if not a data entry operation.
78-
mlir::Value getVarPtr(mlir::Operation *accDataEntryOp);
84+
/// Used to obtain the `varPtr` from a data clause operation.
85+
/// Returns empty value if not a data clause operation or is a data exit
86+
/// operation with no `varPtr`.
87+
mlir::Value getVarPtr(mlir::Operation *accDataClauseOp);
88+
89+
/// Used to obtain the `accPtr` from a data clause operation.
90+
/// When a data entry operation, it obtains its result `accPtr` value.
91+
/// If a data exit operation, it obtains its operand `accPtr` value.
92+
/// Returns empty value if not a data clause operation.
93+
mlir::Value getAccPtr(mlir::Operation *accDataClauseOp);
94+
95+
/// Used to obtain the `varPtrPtr` from a data clause operation.
96+
/// Returns empty value if not a data clause operation.
97+
mlir::Value getVarPtrPtr(mlir::Operation *accDataClauseOp);
98+
99+
/// Used to obtain `bounds` from an acc data clause operation.
100+
/// Returns an empty vector if there are no bounds.
101+
mlir::SmallVector<mlir::Value> getBounds(mlir::Operation *accDataClauseOp);
102+
103+
/// Used to obtain the `name` from an acc operation.
104+
std::optional<llvm::StringRef> getVarName(mlir::Operation *accOp);
79105

80106
/// Used to obtain the `dataClause` from a data entry operation.
81107
/// Returns empty optional if not a data entry operation.
@@ -87,6 +113,12 @@ getDataClause(mlir::Operation *accDataEntryOp);
87113
/// implicit flag.
88114
bool getImplicitFlag(mlir::Operation *accDataEntryOp);
89115

116+
/// Used to get an immutable range iterating over the data operands.
117+
mlir::ValueRange getDataOperands(mlir::Operation *accOp);
118+
119+
/// Used to get a mutable range iterating over the data operands.
120+
mlir::MutableOperandRange getMutableDataOperands(mlir::Operation *accOp);
121+
90122
/// Used to obtain the attribute name for declare.
91123
static constexpr StringLiteral getDeclareAttrName() {
92124
return StringLiteral("acc.declare");
@@ -100,6 +132,16 @@ static constexpr StringLiteral getRoutineInfoAttrName() {
100132
return StringLiteral("acc.routine_info");
101133
}
102134

135+
struct RuntimeCounters
136+
: public mlir::SideEffects::Resource::Base<RuntimeCounters> {
137+
mlir::StringRef getName() final { return "AccRuntimeCounters"; }
138+
};
139+
140+
struct ConstructResource
141+
: public mlir::SideEffects::Resource::Base<ConstructResource> {
142+
mlir::StringRef getName() final { return "AccConstructResource"; }
143+
};
144+
103145
} // namespace acc
104146
} // namespace mlir
105147

0 commit comments

Comments
 (0)