|
| 1 | +//===-- PrivateReductionUtils.cpp -------------------------------*- C++ -*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// |
| 9 | +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ |
| 10 | +// |
| 11 | +//===----------------------------------------------------------------------===// |
| 12 | + |
| 13 | +#include "PrivateReductionUtils.h" |
| 14 | + |
| 15 | +#include "flang/Optimizer/Builder/FIRBuilder.h" |
| 16 | +#include "flang/Optimizer/Builder/HLFIRTools.h" |
| 17 | +#include "flang/Optimizer/Builder/Todo.h" |
| 18 | +#include "flang/Optimizer/HLFIR/HLFIROps.h" |
| 19 | +#include "flang/Optimizer/Support/FatalError.h" |
| 20 | +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" |
| 21 | +#include "mlir/IR/Location.h" |
| 22 | + |
| 23 | +static void createCleanupRegion(fir::FirOpBuilder &builder, mlir::Location loc, |
| 24 | + mlir::Type argType, |
| 25 | + mlir::Region &cleanupRegion) { |
| 26 | + assert(cleanupRegion.empty()); |
| 27 | + mlir::Block *block = builder.createBlock(&cleanupRegion, cleanupRegion.end(), |
| 28 | + {argType}, {loc}); |
| 29 | + builder.setInsertionPointToEnd(block); |
| 30 | + |
| 31 | + auto typeError = [loc]() { |
| 32 | + fir::emitFatalError(loc, |
| 33 | + "Attempt to create an omp cleanup region " |
| 34 | + "for a type that wasn't allocated", |
| 35 | + /*genCrashDiag=*/true); |
| 36 | + }; |
| 37 | + |
| 38 | + mlir::Type valTy = fir::unwrapRefType(argType); |
| 39 | + if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(valTy)) { |
| 40 | + if (!mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy())) { |
| 41 | + mlir::Type innerTy = fir::extractSequenceType(boxTy); |
| 42 | + if (!mlir::isa<fir::SequenceType>(innerTy)) |
| 43 | + typeError(); |
| 44 | + } |
| 45 | + |
| 46 | + mlir::Value arg = builder.loadIfRef(loc, block->getArgument(0)); |
| 47 | + assert(mlir::isa<fir::BaseBoxType>(arg.getType())); |
| 48 | + |
| 49 | + // Deallocate box |
| 50 | + // The FIR type system doesn't nesecarrily know that this is a mutable box |
| 51 | + // if we allocated the thread local array on the heap to avoid looped stack |
| 52 | + // allocations. |
| 53 | + mlir::Value addr = |
| 54 | + hlfir::genVariableRawAddress(loc, builder, hlfir::Entity{arg}); |
| 55 | + mlir::Value isAllocated = builder.genIsNotNullAddr(loc, addr); |
| 56 | + fir::IfOp ifOp = |
| 57 | + builder.create<fir::IfOp>(loc, isAllocated, /*withElseRegion=*/false); |
| 58 | + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); |
| 59 | + |
| 60 | + mlir::Value cast = builder.createConvert( |
| 61 | + loc, fir::HeapType::get(fir::dyn_cast_ptrEleTy(addr.getType())), addr); |
| 62 | + builder.create<fir::FreeMemOp>(loc, cast); |
| 63 | + |
| 64 | + builder.setInsertionPointAfter(ifOp); |
| 65 | + builder.create<mlir::omp::YieldOp>(loc); |
| 66 | + return; |
| 67 | + } |
| 68 | + |
| 69 | + typeError(); |
| 70 | +} |
| 71 | + |
| 72 | +fir::ShapeShiftOp Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder, |
| 73 | + mlir::Location loc, |
| 74 | + mlir::Value box) { |
| 75 | + fir::SequenceType sequenceType = mlir::cast<fir::SequenceType>( |
| 76 | + hlfir::getFortranElementOrSequenceType(box.getType())); |
| 77 | + const unsigned rank = sequenceType.getDimension(); |
| 78 | + llvm::SmallVector<mlir::Value> lbAndExtents; |
| 79 | + lbAndExtents.reserve(rank * 2); |
| 80 | + |
| 81 | + mlir::Type idxTy = builder.getIndexType(); |
| 82 | + for (unsigned i = 0; i < rank; ++i) { |
| 83 | + // TODO: ideally we want to hoist box reads out of the critical section. |
| 84 | + // We could do this by having box dimensions in block arguments like |
| 85 | + // OpenACC does |
| 86 | + mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); |
| 87 | + auto dimInfo = |
| 88 | + builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, dim); |
| 89 | + lbAndExtents.push_back(dimInfo.getLowerBound()); |
| 90 | + lbAndExtents.push_back(dimInfo.getExtent()); |
| 91 | + } |
| 92 | + |
| 93 | + auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank); |
| 94 | + auto shapeShift = |
| 95 | + builder.create<fir::ShapeShiftOp>(loc, shapeShiftTy, lbAndExtents); |
| 96 | + return shapeShift; |
| 97 | +} |
| 98 | + |
| 99 | +void Fortran::lower::omp::populateByRefInitAndCleanupRegions( |
| 100 | + fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type argType, |
| 101 | + mlir::Value scalarInitValue, mlir::Block *initBlock, |
| 102 | + mlir::Value allocatedPrivVarArg, mlir::Value moldArg, |
| 103 | + mlir::Region &cleanupRegion) { |
| 104 | + mlir::Type ty = fir::unwrapRefType(argType); |
| 105 | + builder.setInsertionPointToEnd(initBlock); |
| 106 | + auto yield = [&](mlir::Value ret) { |
| 107 | + builder.create<mlir::omp::YieldOp>(loc, ret); |
| 108 | + }; |
| 109 | + |
| 110 | + if (fir::isa_trivial(ty)) { |
| 111 | + builder.setInsertionPointToEnd(initBlock); |
| 112 | + |
| 113 | + if (scalarInitValue) |
| 114 | + builder.createStoreWithConvert(loc, scalarInitValue, allocatedPrivVarArg); |
| 115 | + yield(allocatedPrivVarArg); |
| 116 | + return; |
| 117 | + } |
| 118 | + |
| 119 | + // check if an allocatable box is unallocated. If so, initialize the boxAlloca |
| 120 | + // to be unallocated e.g. |
| 121 | + // %box_alloca = fir.alloca !fir.box<!fir.heap<...>> |
| 122 | + // %addr = fir.box_addr %box |
| 123 | + // if (%addr == 0) { |
| 124 | + // %nullbox = fir.embox %addr |
| 125 | + // fir.store %nullbox to %box_alloca |
| 126 | + // } else { |
| 127 | + // // ... |
| 128 | + // fir.store %something to %box_alloca |
| 129 | + // } |
| 130 | + // omp.yield %box_alloca |
| 131 | + moldArg = builder.loadIfRef(loc, moldArg); |
| 132 | + auto handleNullAllocatable = [&](mlir::Value boxAlloca) -> fir::IfOp { |
| 133 | + mlir::Value addr = builder.create<fir::BoxAddrOp>(loc, moldArg); |
| 134 | + mlir::Value isNotAllocated = builder.genIsNullAddr(loc, addr); |
| 135 | + fir::IfOp ifOp = builder.create<fir::IfOp>(loc, isNotAllocated, |
| 136 | + /*withElseRegion=*/true); |
| 137 | + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); |
| 138 | + // just embox the null address and return |
| 139 | + mlir::Value nullBox = builder.create<fir::EmboxOp>(loc, ty, addr); |
| 140 | + builder.create<fir::StoreOp>(loc, nullBox, boxAlloca); |
| 141 | + return ifOp; |
| 142 | + }; |
| 143 | + |
| 144 | + // all arrays are boxed |
| 145 | + if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) { |
| 146 | + bool isAllocatableOrPointer = |
| 147 | + mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy()); |
| 148 | + |
| 149 | + builder.setInsertionPointToEnd(initBlock); |
| 150 | + mlir::Value boxAlloca = allocatedPrivVarArg; |
| 151 | + mlir::Type innerTy = fir::unwrapRefType(boxTy.getEleTy()); |
| 152 | + if (fir::isa_trivial(innerTy)) { |
| 153 | + // boxed non-sequence value e.g. !fir.box<!fir.heap<i32>> |
| 154 | + if (!isAllocatableOrPointer) |
| 155 | + TODO(loc, |
| 156 | + "Reduction/Privatization of non-allocatable trivial typed box"); |
| 157 | + |
| 158 | + fir::IfOp ifUnallocated = handleNullAllocatable(boxAlloca); |
| 159 | + |
| 160 | + builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); |
| 161 | + mlir::Value valAlloc = builder.create<fir::AllocMemOp>(loc, innerTy); |
| 162 | + if (scalarInitValue) |
| 163 | + builder.createStoreWithConvert(loc, scalarInitValue, valAlloc); |
| 164 | + mlir::Value box = builder.create<fir::EmboxOp>(loc, ty, valAlloc); |
| 165 | + builder.create<fir::StoreOp>(loc, box, boxAlloca); |
| 166 | + |
| 167 | + createCleanupRegion(builder, loc, argType, cleanupRegion); |
| 168 | + builder.setInsertionPointAfter(ifUnallocated); |
| 169 | + yield(boxAlloca); |
| 170 | + return; |
| 171 | + } |
| 172 | + innerTy = fir::extractSequenceType(boxTy); |
| 173 | + if (!mlir::isa<fir::SequenceType>(innerTy)) |
| 174 | + TODO(loc, "Unsupported boxed type for reduction/privatization"); |
| 175 | + |
| 176 | + fir::IfOp ifUnallocated{nullptr}; |
| 177 | + if (isAllocatableOrPointer) { |
| 178 | + ifUnallocated = handleNullAllocatable(boxAlloca); |
| 179 | + builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); |
| 180 | + } |
| 181 | + |
| 182 | + // Create the private copy from the initial fir.box: |
| 183 | + mlir::Value loadedBox = builder.loadIfRef(loc, moldArg); |
| 184 | + hlfir::Entity source = hlfir::Entity{loadedBox}; |
| 185 | + |
| 186 | + // Allocating on the heap in case the whole reduction is nested inside of a |
| 187 | + // loop |
| 188 | + // TODO: compare performance here to using allocas - this could be made to |
| 189 | + // work by inserting stacksave/stackrestore around the reduction in |
| 190 | + // openmpirbuilder |
| 191 | + auto [temp, needsDealloc] = createTempFromMold(loc, builder, source); |
| 192 | + // if needsDealloc isn't statically false, add cleanup region. Always |
| 193 | + // do this for allocatable boxes because they might have been re-allocated |
| 194 | + // in the body of the loop/parallel region |
| 195 | + |
| 196 | + std::optional<int64_t> cstNeedsDealloc = |
| 197 | + fir::getIntIfConstant(needsDealloc); |
| 198 | + assert(cstNeedsDealloc.has_value() && |
| 199 | + "createTempFromMold decides this statically"); |
| 200 | + if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) { |
| 201 | + mlir::OpBuilder::InsertionGuard guard(builder); |
| 202 | + createCleanupRegion(builder, loc, argType, cleanupRegion); |
| 203 | + } else { |
| 204 | + assert(!isAllocatableOrPointer && |
| 205 | + "Pointer-like arrays must be heap allocated"); |
| 206 | + } |
| 207 | + |
| 208 | + // Put the temporary inside of a box: |
| 209 | + // hlfir::genVariableBox doesn't handle non-default lower bounds |
| 210 | + mlir::Value box; |
| 211 | + fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, loadedBox); |
| 212 | + mlir::Type boxType = loadedBox.getType(); |
| 213 | + if (mlir::isa<fir::BaseBoxType>(temp.getType())) |
| 214 | + // the box created by the declare form createTempFromMold is missing lower |
| 215 | + // bounds info |
| 216 | + box = builder.create<fir::ReboxOp>(loc, boxType, temp, shapeShift, |
| 217 | + /*shift=*/mlir::Value{}); |
| 218 | + else |
| 219 | + box = builder.create<fir::EmboxOp>( |
| 220 | + loc, boxType, temp, shapeShift, |
| 221 | + /*slice=*/mlir::Value{}, |
| 222 | + /*typeParams=*/llvm::ArrayRef<mlir::Value>{}); |
| 223 | + |
| 224 | + if (scalarInitValue) |
| 225 | + builder.create<hlfir::AssignOp>(loc, scalarInitValue, box); |
| 226 | + builder.create<fir::StoreOp>(loc, box, boxAlloca); |
| 227 | + if (ifUnallocated) |
| 228 | + builder.setInsertionPointAfter(ifUnallocated); |
| 229 | + yield(boxAlloca); |
| 230 | + return; |
| 231 | + } |
| 232 | + |
| 233 | + TODO(loc, |
| 234 | + "creating reduction/privatization init region for unsupported type"); |
| 235 | + return; |
| 236 | +} |
0 commit comments