Skip to content

[Flang] Hoisting constant-sized allocas at flang codegen. #95310

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern {
/// appropriate reified structures.
mlir::Value integerCast(mlir::Location loc,
mlir::ConversionPatternRewriter &rewriter,
mlir::Type ty, mlir::Value val) const;
mlir::Type ty, mlir::Value val,
bool fold = false) const;

struct TypePair {
mlir::Type fir;
mlir::Type llvm;
Expand Down Expand Up @@ -144,9 +146,12 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern {
// Find the Block in which the alloca should be inserted.
// The order to recursively find the proper block:
// 1. An OpenMP Op that will be outlined.
// 2. A LLVMFuncOp
// 3. The first ancestor that is an OpenMP Op or a LLVMFuncOp
mlir::Block *getBlockForAllocaInsert(mlir::Operation *op) const;
// 2. An OpenMP or OpenACC Op with one or more regions holding executable
// code.
// 3. A LLVMFuncOp
// 4. The first ancestor that is one of the above.
mlir::Block *getBlockForAllocaInsert(mlir::Operation *op,
mlir::Region *parentRegion) const;

// Generate an alloca of size 1 for an object of type \p llvmObjectTy in the
// allocation address space provided for the architecture in the DataLayout
Expand Down
20 changes: 16 additions & 4 deletions flang/lib/Optimizer/CodeGen/CodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
chrTy.getContext(), chrTy.getFKind());
llvmObjectType = convertType(rawCharTy);
assert(end == 1);
size = integerCast(loc, rewriter, ity, lenParams[0]);
size = integerCast(loc, rewriter, ity, lenParams[0], /*fold=*/true);
} else if (auto recTy = mlir::dyn_cast<fir::RecordType>(scalarType)) {
mlir::LLVM::LLVMFuncOp memSizeFn =
getDependentTypeMemSizeFn(recTy, alloc, rewriter);
Expand All @@ -236,17 +236,29 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
}
}
if (auto scaleSize = genAllocationScaleSize(alloc, ity, rewriter))
size = rewriter.create<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
size =
rewriter.createOrFold<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
if (alloc.hasShapeOperands()) {
unsigned end = operands.size();
for (; i < end; ++i)
size = rewriter.create<mlir::LLVM::MulOp>(
loc, ity, size, integerCast(loc, rewriter, ity, operands[i]));
size = rewriter.createOrFold<mlir::LLVM::MulOp>(
loc, ity, size,
integerCast(loc, rewriter, ity, operands[i], /*fold=*/true));
}

unsigned allocaAs = getAllocaAddressSpace(rewriter);
unsigned programAs = getProgramAddressSpace(rewriter);

if (mlir::isa<mlir::LLVM::ConstantOp>(size.getDefiningOp())) {
// Set the Block in which the llvm alloca should be inserted.
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent();
mlir::Block *insertBlock =
getBlockForAllocaInsert(parentOp, parentRegion);
size.getDefiningOp()->moveAfter(insertBlock, insertBlock->begin());
rewriter.setInsertionPointAfter(size.getDefiningOp());
}

// NOTE: we used to pass alloc->getAttrs() in the builder for non opaque
// pointers! Only propagate pinned and bindc_name to help debugging, but
// this should have no functional purpose (and passing the operand segment
Expand Down
48 changes: 25 additions & 23 deletions flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,21 +62,27 @@ mlir::LLVM::ConstantOp ConvertFIRToLLVMPattern::genConstantOffset(
/// to the specific target may involve some sign-extending or truncation of
/// values, particularly to fit them from abstract box types to the
/// appropriate reified structures.
mlir::Value
ConvertFIRToLLVMPattern::integerCast(mlir::Location loc,
mlir::ConversionPatternRewriter &rewriter,
mlir::Type ty, mlir::Value val) const {
mlir::Value ConvertFIRToLLVMPattern::integerCast(
mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
mlir::Type ty, mlir::Value val, bool fold) const {
auto valTy = val.getType();
// If the value was not yet lowered, lower its type so that it can
// be used in getPrimitiveTypeSizeInBits.
if (!mlir::isa<mlir::IntegerType>(valTy))
valTy = convertType(valTy);
auto toSize = mlir::LLVM::getPrimitiveTypeSizeInBits(ty);
auto fromSize = mlir::LLVM::getPrimitiveTypeSizeInBits(valTy);
if (toSize < fromSize)
return rewriter.create<mlir::LLVM::TruncOp>(loc, ty, val);
if (toSize > fromSize)
return rewriter.create<mlir::LLVM::SExtOp>(loc, ty, val);
if (fold) {
if (toSize < fromSize)
return rewriter.createOrFold<mlir::LLVM::TruncOp>(loc, ty, val);
if (toSize > fromSize)
return rewriter.createOrFold<mlir::LLVM::SExtOp>(loc, ty, val);
} else {
if (toSize < fromSize)
return rewriter.create<mlir::LLVM::TruncOp>(loc, ty, val);
if (toSize > fromSize)
return rewriter.create<mlir::LLVM::SExtOp>(loc, ty, val);
}
return val;
}

Expand Down Expand Up @@ -274,16 +280,19 @@ mlir::Value ConvertFIRToLLVMPattern::computeBoxSize(
// Find the Block in which the alloca should be inserted.
// The order to recursively find the proper block:
// 1. An OpenMP Op that will be outlined.
// 2. A LLVMFuncOp
// 3. The first ancestor that is an OpenMP Op or a LLVMFuncOp
mlir::Block *
ConvertFIRToLLVMPattern::getBlockForAllocaInsert(mlir::Operation *op) const {
// 2. An OpenMP or OpenACC Op with one or more regions holding executable code.
// 3. A LLVMFuncOp
// 4. The first ancestor that is one of the above.
mlir::Block *ConvertFIRToLLVMPattern::getBlockForAllocaInsert(
mlir::Operation *op, mlir::Region *parentRegion) const {
if (auto iface = mlir::dyn_cast<mlir::omp::OutlineableOpenMPOpInterface>(op))
return iface.getAllocaBlock();
if (auto recipeIface = mlir::dyn_cast<mlir::accomp::RecipeInterface>(op))
return recipeIface.getAllocaBlock(*parentRegion);
if (auto llvmFuncOp = mlir::dyn_cast<mlir::LLVM::LLVMFuncOp>(op))
return &llvmFuncOp.front();

return getBlockForAllocaInsert(op->getParentOp());
return getBlockForAllocaInsert(op->getParentOp(), parentRegion);
}

// Generate an alloca of size 1 for an object of type \p llvmObjectTy in the
Expand All @@ -297,16 +306,9 @@ mlir::Value ConvertFIRToLLVMPattern::genAllocaAndAddrCastWithType(
mlir::ConversionPatternRewriter &rewriter) const {
auto thisPt = rewriter.saveInsertionPoint();
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
if (mlir::isa<mlir::omp::DeclareReductionOp>(parentOp) ||
mlir::isa<mlir::omp::PrivateClauseOp>(parentOp)) {
// DeclareReductionOp & PrivateClauseOp have multiple child regions. We want
// to get the first block of whichever of those regions we are currently in
mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent();
rewriter.setInsertionPointToStart(&parentRegion->front());
} else {
mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp);
rewriter.setInsertionPointToStart(insertBlock);
}
mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent();
mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp, parentRegion);
rewriter.setInsertionPointToStart(insertBlock);
auto size = genI32Constant(loc, rewriter, 1);
unsigned allocaAs = getAllocaAddressSpace(rewriter);
unsigned programAs = getProgramAddressSpace(rewriter);
Expand Down
12 changes: 6 additions & 6 deletions flang/test/Fir/alloc.fir
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ func.func @allocmem_array_of_dynchar(%l: i32) -> !fir.heap<!fir.array<3x3x!fir.c

// CHECK-LABEL: define ptr @alloca_dynarray_of_nonchar(
// CHECK-SAME: i64 %[[extent:.*]])
// CHECK: %[[prod1:.*]] = mul i64 1, %[[extent]]
// CHECK: %[[prod1:.*]] = mul i64 %[[extent]], 1
// CHECK: alloca [3 x i32], i64 %[[prod1]]
func.func @alloca_dynarray_of_nonchar(%e: index) -> !fir.ref<!fir.array<3x?xi32>> {
%1 = fir.alloca !fir.array<3x?xi32>, %e
Expand All @@ -165,7 +165,7 @@ func.func @alloca_dynarray_of_nonchar(%e: index) -> !fir.ref<!fir.array<3x?xi32>

// CHECK-LABEL: define ptr @alloca_dynarray_of_nonchar2(
// CHECK-SAME: i64 %[[extent:.*]])
// CHECK: %[[prod1:.*]] = mul i64 1, %[[extent]]
// CHECK: %[[prod1:.*]] = mul i64 %[[extent]], 1
// CHECK: %[[prod2:.*]] = mul i64 %[[prod1]], %[[extent]]
// CHECK: alloca i32, i64 %[[prod2]]
func.func @alloca_dynarray_of_nonchar2(%e: index) -> !fir.ref<!fir.array<?x?xi32>> {
Expand Down Expand Up @@ -194,7 +194,7 @@ func.func @allocmem_dynarray_of_nonchar2(%e: index) -> !fir.heap<!fir.array<?x?x

// CHECK-LABEL: define ptr @alloca_dynarray_of_char(
// CHECK-SAME: i64 %[[extent:.*]])
// CHECK: %[[prod1:.*]] = mul i64 1, %[[extent]]
// CHECK: %[[prod1:.*]] = mul i64 %[[extent]], 1
// CHECK: alloca [3 x [10 x i16]], i64 %[[prod1]]
func.func @alloca_dynarray_of_char(%e : index) -> !fir.ref<!fir.array<3x?x!fir.char<2,10>>> {
%1 = fir.alloca !fir.array<3x?x!fir.char<2,10>>, %e
Expand All @@ -203,7 +203,7 @@ func.func @alloca_dynarray_of_char(%e : index) -> !fir.ref<!fir.array<3x?x!fir.c

// CHECK-LABEL: define ptr @alloca_dynarray_of_char2(
// CHECK-SAME: i64 %[[extent:.*]])
// CHECK: %[[prod1:.*]] = mul i64 1, %[[extent]]
// CHECK: %[[prod1:.*]] = mul i64 %[[extent]], 1
// CHECK: %[[prod2:.*]] = mul i64 %[[prod1]], %[[extent]]
// CHECK: alloca [10 x i16], i64 %[[prod2]]
func.func @alloca_dynarray_of_char2(%e : index) -> !fir.ref<!fir.array<?x?x!fir.char<2,10>>> {
Expand Down Expand Up @@ -334,10 +334,10 @@ func.func @allocmem_array_with_holes_dynchar(%arg0: index, %arg1: index) -> !fir
}

// CHECK-LABEL: define void @alloca_unlimited_polymorphic_box
// CHECK: %[[VAL_0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1
// CHECK: %[[VAL_1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, i64 1
// CHECK: %[[VAL_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1
// CHECK: %[[VAL_0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1
// CHECK: %[[VAL_3:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, i64 1
// CHECK: %[[VAL_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1

func.func @alloca_unlimited_polymorphic_box() {
%0 = fir.alloca !fir.class<none>
Expand Down
10 changes: 5 additions & 5 deletions flang/test/Fir/boxproc.fir
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
// RUN: tco %s | FileCheck %s

// CHECK-LABEL: define void @_QPtest_proc_dummy()
// CHECK: %[[VAL_0:.*]] = alloca i32, i64 1, align 4
// CHECK: %[[VAL_3:.*]] = alloca [32 x i8], i64 1, align 1
// CHECK: %[[VAL_1:.*]] = alloca { ptr }, i64 1, align 8
// CHECK: %[[VAL_0:.*]] = alloca i32, i64 1, align 4
// CHECK: %[[VAL_2:.*]] = getelementptr { ptr }, ptr %[[VAL_1]], i32 0, i32 0
// CHECK: store ptr %[[VAL_0]], ptr %[[VAL_2]], align 8
// CHECK: store i32 1, ptr %[[VAL_0]], align 4
// CHECK: %[[VAL_3:.*]] = alloca [32 x i8], i64 1, align 1
// CHECK: call void @llvm.init.trampoline(ptr %[[VAL_3]], ptr @_QFtest_proc_dummyPtest_proc_dummy_a, ptr %[[VAL_1]])
// CHECK: %[[VAL_6:.*]] = call ptr @llvm.adjust.trampoline(ptr %[[VAL_3]])
// CHECK: call void @_QPtest_proc_dummy_other(ptr %[[VAL_6]])
Expand Down Expand Up @@ -61,9 +61,10 @@ func.func @_QPtest_proc_dummy_other(%arg0: !fir.boxproc<() -> ()>) {
}

// CHECK-LABEL: define void @_QPtest_proc_dummy_char()
// CHECK: %[[VAL_0:.*]] = alloca [40 x i8], i64 1, align 1
// CHECK: %[[VAL_1:.*]] = alloca [10 x i8], i64 1, align 1
// CHECK: %[[VAL_20:.*]] = alloca [32 x i8], i64 1, align 1
// CHECK: %[[VAL_2:.*]] = alloca { { ptr, i64 } }, i64 1, align 8
// CHECK: %[[VAL_1:.*]] = alloca [10 x i8], i64 1, align 1
// CHECK: %[[VAL_0:.*]] = alloca [40 x i8], i64 1, align 1
// CHECK: %[[VAL_3:.*]] = getelementptr { { ptr, i64 } }, ptr %[[VAL_2]], i32 0, i32 0
// CHECK: %[[VAL_5:.*]] = insertvalue { ptr, i64 } undef, ptr %[[VAL_1]], 0
// CHECK: %[[VAL_6:.*]] = insertvalue { ptr, i64 } %[[VAL_5]], i64 10, 1
Expand All @@ -75,7 +76,6 @@ func.func @_QPtest_proc_dummy_other(%arg0: !fir.boxproc<() -> ()>) {
// CHECK: %[[VAL_15:.*]] = icmp sgt i64 %[[VAL_13]], 0
// CHECK: %[[VAL_18:.*]] = getelementptr [10 x [1 x i8]], ptr %[[VAL_1]], i32 0, i64 %[[VAL_11]]
// CHECK: store [1 x i8] c" ", ptr %[[VAL_18]], align 1
// CHECK: %[[VAL_20:.*]] = alloca [32 x i8], i64 1, align 1
// CHECK: call void @llvm.init.trampoline(ptr %[[VAL_20]], ptr @_QFtest_proc_dummy_charPgen_message, ptr %[[VAL_2]])
// CHECK: %[[VAL_23:.*]] = call ptr @llvm.adjust.trampoline(ptr %[[VAL_20]])
// CHECK: %[[VAL_25:.*]] = insertvalue { ptr, i64 } undef, ptr %[[VAL_23]], 0
Expand Down
Loading
Loading