@@ -1949,6 +1949,99 @@ void collectMapDataFromMapOperands(MapInfoData &mapData,
1949
1949
}
1950
1950
}
1951
1951
1952
+ // / This function calculates the array/pointer offset for map data provided
1953
+ // / with bounds operations, e.g. when provided something like the following:
1954
+ // /
1955
+ // / Fortran
1956
+ // / map(tofrom: array(2:5, 3:2))
1957
+ // / or
1958
+ // / C++
1959
+ // / map(tofrom: array[1:4][2:3])
1960
+ // / We must calculate the initial pointer offset to pass across, this function
1961
+ // / performs this using bounds.
1962
+ // /
1963
+ // / NOTE: which while specified in row-major order it currently needs to be
1964
+ // / flipped for Fortran's column order array allocation and access (as
1965
+ // / opposed to C++'s row-major, hence the backwards processing where order is
1966
+ // / important). This is likely important to keep in mind for the future when
1967
+ // / we incorporate a C++ frontend, both frontends will need to agree on the
1968
+ // / ordering of generated bounds operations (one may have to flip them) to
1969
+ // / make the below lowering frontend agnostic. The offload size
1970
+ // / calcualtion may also have to be adjusted for C++.
1971
+ std::vector<llvm::Value *>
1972
+ calculateBoundsOffset (LLVM::ModuleTranslation &moduleTranslation,
1973
+ llvm::IRBuilderBase &builder, bool isArrayTy,
1974
+ mlir::OperandRange bounds) {
1975
+ std::vector<llvm::Value *> idx;
1976
+ // There's no bounds to calculate an offset from, we can safely
1977
+ // ignore and return no indices.
1978
+ if (bounds.empty ())
1979
+ return idx;
1980
+
1981
+ // If we have an array type, then we have its type so can treat it as a
1982
+ // normal GEP instruction where the bounds operations are simply indexes
1983
+ // into the array. We currently do reverse order of the bounds, which
1984
+ // I believe leans more towards Fortran's column-major in memory.
1985
+ if (isArrayTy) {
1986
+ idx.push_back (builder.getInt64 (0 ));
1987
+ for (int i = bounds.size () - 1 ; i >= 0 ; --i) {
1988
+ if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::DataBoundsOp>(
1989
+ bounds[i].getDefiningOp ())) {
1990
+ idx.push_back (moduleTranslation.lookupValue (boundOp.getLowerBound ()));
1991
+ }
1992
+ }
1993
+ } else {
1994
+ // If we do not have an array type, but we have bounds, then we're dealing
1995
+ // with a pointer that's being treated like an array and we have the
1996
+ // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
1997
+ // address (pointer pointing to the actual data) so we must caclulate the
1998
+ // offset using a single index which the following two loops attempts to
1999
+ // compute.
2000
+
2001
+ // Calculates the size offset we need to make per row e.g. first row or
2002
+ // column only needs to be offset by one, but the next would have to be
2003
+ // the previous row/column offset multiplied by the extent of current row.
2004
+ //
2005
+ // For example ([1][10][100]):
2006
+ //
2007
+ // - First row/column we move by 1 for each index increment
2008
+ // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
2009
+ // current) for 10 for each index increment
2010
+ // - Third row/column we would move by 10 (second row/column) *
2011
+ // (extent/size of current) 100 for 1000 for each index increment
2012
+ std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64 (1 )};
2013
+ for (size_t i = 1 ; i < bounds.size (); ++i) {
2014
+ if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::DataBoundsOp>(
2015
+ bounds[i].getDefiningOp ())) {
2016
+ dimensionIndexSizeOffset.push_back (builder.CreateMul (
2017
+ moduleTranslation.lookupValue (boundOp.getExtent ()),
2018
+ dimensionIndexSizeOffset[i - 1 ]));
2019
+ }
2020
+ }
2021
+
2022
+ // Now that we have calculated how much we move by per index, we must
2023
+ // multiply each lower bound offset in indexes by the size offset we
2024
+ // have calculated in the previous and accumulate the results to get
2025
+ // our final resulting offset.
2026
+ for (int i = bounds.size () - 1 ; i >= 0 ; --i) {
2027
+ if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::DataBoundsOp>(
2028
+ bounds[i].getDefiningOp ())) {
2029
+ if (idx.empty ())
2030
+ idx.emplace_back (builder.CreateMul (
2031
+ moduleTranslation.lookupValue (boundOp.getLowerBound ()),
2032
+ dimensionIndexSizeOffset[i]));
2033
+ else
2034
+ idx.back () = builder.CreateAdd (
2035
+ idx.back (), builder.CreateMul (moduleTranslation.lookupValue (
2036
+ boundOp.getLowerBound ()),
2037
+ dimensionIndexSizeOffset[i]));
2038
+ }
2039
+ }
2040
+ }
2041
+
2042
+ return idx;
2043
+ }
2044
+
1952
2045
// This creates two insertions into the MapInfosTy data structure for the
1953
2046
// "parent" of a set of members, (usually a container e.g.
1954
2047
// class/structure/derived type) when subsequent members have also been
@@ -2060,55 +2153,7 @@ static void processMapMembersWithParent(
2060
2153
LLVM::createMappingInformation (memberClause.getLoc (), ompBuilder));
2061
2154
2062
2155
combinedInfo.BasePointers .emplace_back (mapData.BasePointers [memberDataIdx]);
2063
-
2064
- std::vector<llvm::Value *> idx{builder.getInt64 (0 )};
2065
- llvm::Value *offsetAddress = nullptr ;
2066
- if (!memberClause.getBounds ().empty ()) {
2067
- if (mapData.BaseType [memberDataIdx]->isArrayTy ()) {
2068
- for (int i = memberClause.getBounds ().size () - 1 ; i >= 0 ; --i) {
2069
- if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::DataBoundsOp>(
2070
- memberClause.getBounds ()[i].getDefiningOp ())) {
2071
- idx.push_back (
2072
- moduleTranslation.lookupValue (boundOp.getLowerBound ()));
2073
- }
2074
- }
2075
- } else {
2076
- std::vector<llvm::Value *> dimensionIndexSizeOffset{
2077
- builder.getInt64 (1 )};
2078
- for (size_t i = 1 ; i < memberClause.getBounds ().size (); ++i) {
2079
- if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::DataBoundsOp>(
2080
- memberClause.getBounds ()[i].getDefiningOp ())) {
2081
- dimensionIndexSizeOffset.push_back (builder.CreateMul (
2082
- moduleTranslation.lookupValue (boundOp.getExtent ()),
2083
- dimensionIndexSizeOffset[i - 1 ]));
2084
- }
2085
- }
2086
-
2087
- for (int i = memberClause.getBounds ().size () - 1 ; i >= 0 ; --i) {
2088
- if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::DataBoundsOp>(
2089
- memberClause.getBounds ()[i].getDefiningOp ())) {
2090
- if (!offsetAddress)
2091
- offsetAddress = builder.CreateMul (
2092
- moduleTranslation.lookupValue (boundOp.getLowerBound ()),
2093
- dimensionIndexSizeOffset[i]);
2094
- else
2095
- offsetAddress = builder.CreateAdd (
2096
- offsetAddress,
2097
- builder.CreateMul (
2098
- moduleTranslation.lookupValue (boundOp.getLowerBound ()),
2099
- dimensionIndexSizeOffset[i]));
2100
- }
2101
- }
2102
- }
2103
- }
2104
-
2105
- llvm::Value *memberIdx =
2106
- builder.CreateLoad (builder.getPtrTy (), mapData.Pointers [memberDataIdx]);
2107
- memberIdx = builder.CreateInBoundsGEP (
2108
- mapData.BaseType [memberDataIdx], memberIdx,
2109
- offsetAddress ? std::vector<llvm::Value *>{offsetAddress} : idx,
2110
- " member_idx" );
2111
- combinedInfo.Pointers .emplace_back (memberIdx);
2156
+ combinedInfo.Pointers .emplace_back (mapData.Pointers [memberDataIdx]);
2112
2157
combinedInfo.Sizes .emplace_back (mapData.Sizes [memberDataIdx]);
2113
2158
}
2114
2159
}
@@ -2126,6 +2171,77 @@ static void processMapWithMembersOf(
2126
2171
memberOfParentFlag);
2127
2172
}
2128
2173
2174
+ // This is a variation on Clang's GenerateOpenMPCapturedVars, which
2175
+ // generates different operation (e.g. load/store) combinations for
2176
+ // arguments to the kernel, based on map capture kinds which are then
2177
+ // utilised in the combinedInfo in place of the original Map value.
2178
+ static void
2179
+ createAlteredByCaptureMap (MapInfoData &mapData,
2180
+ LLVM::ModuleTranslation &moduleTranslation,
2181
+ llvm::IRBuilderBase &builder) {
2182
+ for (size_t i = 0 ; i < mapData.MapClause .size (); ++i) {
2183
+ // if it's declare target, skip it, it's handled seperately.
2184
+ if (!mapData.IsDeclareTarget [i]) {
2185
+ auto mapOp =
2186
+ mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(mapData.MapClause [i]);
2187
+ mlir::omp::VariableCaptureKind captureKind =
2188
+ mapOp.getMapCaptureType ().value_or (
2189
+ mlir::omp::VariableCaptureKind::ByRef);
2190
+ bool isPtrTy = checkIfPointerMap (
2191
+ llvm::omp::OpenMPOffloadMappingFlags (mapOp.getMapType ().value ()));
2192
+
2193
+ // Currently handles array sectioning lowerbound case, but more
2194
+ // logic may be required in the future. Clang invokes EmitLValue,
2195
+ // which has specialised logic for special Clang types such as user
2196
+ // defines, so it is possible we will have to extend this for
2197
+ // structures or other complex types. As the general idea is that this
2198
+ // function mimics some of the logic from Clang that we require for
2199
+ // kernel argument passing from host -> device.
2200
+ switch (captureKind) {
2201
+ case mlir::omp::VariableCaptureKind::ByRef: {
2202
+ llvm::Value *newV = mapData.Pointers [i];
2203
+ std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset (
2204
+ moduleTranslation, builder, mapData.BaseType [i]->isArrayTy (),
2205
+ mapOp.getBounds ());
2206
+ if (isPtrTy)
2207
+ newV = builder.CreateLoad (builder.getPtrTy (), newV);
2208
+
2209
+ if (!offsetIdx.empty ())
2210
+ newV = builder.CreateInBoundsGEP (mapData.BaseType [i], newV, offsetIdx,
2211
+ " array_offset" );
2212
+ mapData.Pointers [i] = newV;
2213
+ } break ;
2214
+ case mlir::omp::VariableCaptureKind::ByCopy: {
2215
+ llvm::Type *type = mapData.BaseType [i];
2216
+ llvm::Value *newV;
2217
+ if (mapData.Pointers [i]->getType ()->isPointerTy ())
2218
+ newV = builder.CreateLoad (type, mapData.Pointers [i]);
2219
+ else
2220
+ newV = mapData.Pointers [i];
2221
+
2222
+ if (!isPtrTy) {
2223
+ auto curInsert = builder.saveIP ();
2224
+ builder.restoreIP (findAllocaInsertPoint (builder, moduleTranslation));
2225
+ auto *memTempAlloc =
2226
+ builder.CreateAlloca (builder.getPtrTy (), nullptr , " .casted" );
2227
+ builder.restoreIP (curInsert);
2228
+
2229
+ builder.CreateStore (newV, memTempAlloc);
2230
+ newV = builder.CreateLoad (builder.getPtrTy (), memTempAlloc);
2231
+ }
2232
+
2233
+ mapData.Pointers [i] = newV;
2234
+ mapData.BasePointers [i] = newV;
2235
+ } break ;
2236
+ case mlir::omp::VariableCaptureKind::This:
2237
+ case mlir::omp::VariableCaptureKind::VLAType:
2238
+ mapData.MapClause [i]->emitOpError (" Unhandled capture kind" );
2239
+ break ;
2240
+ }
2241
+ }
2242
+ }
2243
+ }
2244
+
2129
2245
// Generate all map related information and fill the combinedInfo.
2130
2246
static void genMapInfos (llvm::IRBuilderBase &builder,
2131
2247
LLVM::ModuleTranslation &moduleTranslation,
@@ -2135,6 +2251,20 @@ static void genMapInfos(llvm::IRBuilderBase &builder,
2135
2251
const SmallVector<Value> &devPtrOperands = {},
2136
2252
const SmallVector<Value> &devAddrOperands = {},
2137
2253
bool isTargetParams = false ) {
2254
+ // We wish to modify some of the methods in which arguments are
2255
+ // passed based on their capture type by the target region, this can
2256
+ // involve generating new loads and stores, which changes the
2257
+ // MLIR value to LLVM value mapping, however, we only wish to do this
2258
+ // locally for the current function/target and also avoid altering
2259
+ // ModuleTranslation, so we remap the base pointer or pointer stored
2260
+ // in the map infos corresponding MapInfoData, which is later accessed
2261
+ // by genMapInfos and createTarget to help generate the kernel and
2262
+ // kernel arg structure. It primarily becomes relevant in cases like
2263
+ // bycopy, or byref range'd arrays. In the default case, we simply
2264
+ // pass thee pointer byref as both basePointer and pointer.
2265
+ if (!moduleTranslation.getOpenMPBuilder ()->Config .isTargetDevice ())
2266
+ createAlteredByCaptureMap (mapData, moduleTranslation, builder);
2267
+
2138
2268
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder ();
2139
2269
2140
2270
auto fail = [&combinedInfo]() -> void {
@@ -2627,90 +2757,6 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
2627
2757
return builder.saveIP ();
2628
2758
}
2629
2759
2630
- // This is a variation on Clang's GenerateOpenMPCapturedVars, which
2631
- // generates different operation (e.g. load/store) combinations for
2632
- // arguments to the kernel, based on map capture kinds which are then
2633
- // utilised in the combinedInfo in place of the original Map value.
2634
- static void
2635
- createAlteredByCaptureMap (MapInfoData &mapData,
2636
- LLVM::ModuleTranslation &moduleTranslation,
2637
- llvm::IRBuilderBase &builder) {
2638
- for (size_t i = 0 ; i < mapData.MapClause .size (); ++i) {
2639
- // if it's declare target, skip it, it's handled seperately.
2640
- if (!mapData.IsDeclareTarget [i]) {
2641
- mlir::omp::VariableCaptureKind captureKind =
2642
- mlir::omp::VariableCaptureKind::ByRef;
2643
-
2644
- bool isPtrTy = false ;
2645
- if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
2646
- mapData.MapClause [i])) {
2647
- captureKind = mapOp.getMapCaptureType ().value_or (
2648
- mlir::omp::VariableCaptureKind::ByRef);
2649
-
2650
- isPtrTy = checkIfPointerMap (
2651
- llvm::omp::OpenMPOffloadMappingFlags (mapOp.getMapType ().value ()));
2652
- }
2653
-
2654
- switch (captureKind) {
2655
- case mlir::omp::VariableCaptureKind::ByRef: {
2656
- // Currently handles array sectioning lowerbound case, but more
2657
- // logic may be required in the future. Clang invokes EmitLValue,
2658
- // which has specialised logic for special Clang types such as user
2659
- // defines, so it is possible we will have to extend this for
2660
- // structures or other complex types. As the general idea is that this
2661
- // function mimics some of the logic from Clang that we require for
2662
- // kernel argument passing from host -> device.
2663
- if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
2664
- mapData.MapClause [i])) {
2665
- if (!mapOp.getBounds ().empty () && mapData.BaseType [i]->isArrayTy ()) {
2666
-
2667
- std::vector<llvm::Value *> idx =
2668
- std::vector<llvm::Value *>{builder.getInt64 (0 )};
2669
- for (int i = mapOp.getBounds ().size () - 1 ; i >= 0 ; --i) {
2670
- if (auto boundOp =
2671
- mlir::dyn_cast_if_present<mlir::omp::DataBoundsOp>(
2672
- mapOp.getBounds ()[i].getDefiningOp ())) {
2673
- idx.push_back (
2674
- moduleTranslation.lookupValue (boundOp.getLowerBound ()));
2675
- }
2676
- }
2677
-
2678
- mapData.Pointers [i] = builder.CreateInBoundsGEP (
2679
- mapData.BaseType [i], mapData.Pointers [i], idx);
2680
- }
2681
- }
2682
- } break ;
2683
- case mlir::omp::VariableCaptureKind::ByCopy: {
2684
- llvm::Type *type = mapData.BaseType [i];
2685
- llvm::Value *newV;
2686
- if (mapData.Pointers [i]->getType ()->isPointerTy ())
2687
- newV = builder.CreateLoad (type, mapData.Pointers [i]);
2688
- else
2689
- newV = mapData.Pointers [i];
2690
-
2691
- if (!isPtrTy) {
2692
- auto curInsert = builder.saveIP ();
2693
- builder.restoreIP (findAllocaInsertPoint (builder, moduleTranslation));
2694
- auto *memTempAlloc =
2695
- builder.CreateAlloca (builder.getPtrTy (), nullptr , " .casted" );
2696
- builder.restoreIP (curInsert);
2697
-
2698
- builder.CreateStore (newV, memTempAlloc);
2699
- newV = builder.CreateLoad (builder.getPtrTy (), memTempAlloc);
2700
- }
2701
-
2702
- mapData.Pointers [i] = newV;
2703
- mapData.BasePointers [i] = newV;
2704
- } break ;
2705
- case mlir::omp::VariableCaptureKind::This:
2706
- case mlir::omp::VariableCaptureKind::VLAType:
2707
- mapData.MapClause [i]->emitOpError (" Unhandled capture kind" );
2708
- break ;
2709
- }
2710
- }
2711
- }
2712
- }
2713
-
2714
2760
static LogicalResult
2715
2761
convertOmpTarget (Operation &opInst, llvm::IRBuilderBase &builder,
2716
2762
LLVM::ModuleTranslation &moduleTranslation) {
@@ -2779,20 +2825,6 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
2779
2825
collectMapDataFromMapOperands (mapData, mapOperands, moduleTranslation, dl,
2780
2826
builder);
2781
2827
2782
- // We wish to modify some of the methods in which kernel arguments are
2783
- // passed based on their capture type by the target region, this can
2784
- // involve generating new loads and stores, which changes the
2785
- // MLIR value to LLVM value mapping, however, we only wish to do this
2786
- // locally for the current function/target and also avoid altering
2787
- // ModuleTranslation, so we remap the base pointer or pointer stored
2788
- // in the map infos corresponding MapInfoData, which is later accessed
2789
- // by genMapInfos and createTarget to help generate the kernel and
2790
- // kernel arg structure. It primarily becomes relevant in cases like
2791
- // bycopy, or byref range'd arrays. In the default case, we simply
2792
- // pass thee pointer byref as both basePointer and pointer.
2793
- if (!moduleTranslation.getOpenMPBuilder ()->Config .isTargetDevice ())
2794
- createAlteredByCaptureMap (mapData, moduleTranslation, builder);
2795
-
2796
2828
llvm::OpenMPIRBuilder::MapInfosTy combinedInfos;
2797
2829
auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
2798
2830
-> llvm::OpenMPIRBuilder::MapInfosTy & {
0 commit comments