Skip to content

[flang] introduce fir.copy to avoid load store of aggregates #130289

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions flang/include/flang/Optimizer/Dialect/FIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ def IsBoxAddressOrValueTypePred
def fir_BoxAddressOrValueType : Type<IsBoxAddressOrValueTypePred,
"fir.box or fir.class type or reference">;

def RefOfConstantSizeAggregateTypePred
: CPred<"::fir::isRefOfConstantSizeAggregateType($_self)">;
def AnyRefOfConstantSizeAggregateType : TypeConstraint<
RefOfConstantSizeAggregateTypePred,
"a reference type to a constant size fir.array, fir.char, or fir.type">;

//===----------------------------------------------------------------------===//
// Memory SSA operations
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -342,6 +348,44 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface]> {
}];
}

def fir_CopyOp : fir_Op<"copy", []> {
let summary = "copy constant size memory";

let description = [{
Copy the memory from a source with compile time constant size to
a destination of the same type.

This is meant to be used for aggregate types where load and store
are not appropriate to make a copy because LLVM is not meant to
handle load and store of "big" aggregates.
Comment on lines +358 to +360
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to add a verifier check or a predicate (AnyRefOfAggregateType) to ensure it is used only for aggregate types?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, although there is nothing that would technically prevent it to be used for intrinsic types, it seems saner to prevent it. I added a predicate.


Its "no_overlap" attribute allows indicating that the source and destination
are known to not overlap at compile time.

```
!t =!fir.type<t{x:!fir.array<1000xi32>}>
fir.copy %x to %y : !fir.ref<!t>, !fir.ref<!t>
```
TODO: add FirAliasTagOpInterface to carry TBAA.
}];

let arguments = (ins Arg<AnyRefOfConstantSizeAggregateType, "", [MemRead]>:$source,
Arg<AnyRefOfConstantSizeAggregateType, "", [MemWrite]>:$destination,
OptionalAttr<UnitAttr>:$no_overlap);

let builders = [OpBuilder<(ins "mlir::Value":$source,
"mlir::Value":$destination,
CArg<"bool", "false">:$no_overlap)>];

let assemblyFormat = [{
$source `to` $destination (`no_overlap` $no_overlap^)?
attr-dict `:` type(operands)
}];

let hasVerifier = 1;
}


def fir_SaveResultOp : fir_Op<"save_result", [AttrSizedOperandSegments]> {
let summary = [{
save an array, box, or record function result SSA-value to a memory location
Expand Down
7 changes: 7 additions & 0 deletions flang/include/flang/Optimizer/Dialect/FIRType.h
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,13 @@ inline bool isBoxProcAddressType(mlir::Type t) {
return t && mlir::isa<fir::BoxProcType>(t);
}

inline bool isRefOfConstantSizeAggregateType(mlir::Type t) {
t = fir::dyn_cast_ptrEleTy(t);
return t &&
mlir::isa<fir::CharacterType, fir::RecordType, fir::SequenceType>(t) &&
!hasDynamicSize(t);
}

/// Return a string representation of `ty`.
///
/// fir.array<10x10xf32> -> prefix_10x10xf32
Expand Down
40 changes: 35 additions & 5 deletions flang/lib/Optimizer/CodeGen/CodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3539,6 +3539,36 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> {
}
};

/// `fir.copy` --> `llvm.memcpy` or `llvm.memmove`
struct CopyOpConversion : public fir::FIROpConversion<fir::CopyOp> {
using FIROpConversion::FIROpConversion;

llvm::LogicalResult
matchAndRewrite(fir::CopyOp copy, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
mlir::Location loc = copy.getLoc();
mlir::Value llvmSource = adaptor.getSource();
mlir::Value llvmDestination = adaptor.getDestination();
mlir::Type i64Ty = mlir::IntegerType::get(rewriter.getContext(), 64);
mlir::Type copyTy = fir::unwrapRefType(copy.getSource().getType());
mlir::Value copySize =
genTypeStrideInBytes(loc, i64Ty, rewriter, convertType(copyTy));

mlir::LLVM::AliasAnalysisOpInterface newOp;
if (copy.getNoOverlap())
newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false);
else
newOp = rewriter.create<mlir::LLVM::MemmoveOp>(
loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false);

// TODO: propagate TBAA once FirAliasTagOpInterface added to CopyOp.
attachTBAATag(newOp, copyTy, copyTy, nullptr);
rewriter.eraseOp(copy);
return mlir::success();
}
};

namespace {

/// Convert `fir.unboxchar` into two `llvm.extractvalue` instructions. One for
Expand Down Expand Up @@ -4142,11 +4172,11 @@ void fir::populateFIRToLLVMConversionPatterns(
BoxOffsetOpConversion, BoxProcHostOpConversion, BoxRankOpConversion,
BoxTypeCodeOpConversion, BoxTypeDescOpConversion, CallOpConversion,
CmpcOpConversion, ConvertOpConversion, CoordinateOpConversion,
DTEntryOpConversion, DeclareOpConversion, DivcOpConversion,
EmboxOpConversion, EmboxCharOpConversion, EmboxProcOpConversion,
ExtractValueOpConversion, FieldIndexOpConversion, FirEndOpConversion,
FreeMemOpConversion, GlobalLenOpConversion, GlobalOpConversion,
InsertOnRangeOpConversion, IsPresentOpConversion,
CopyOpConversion, DTEntryOpConversion, DeclareOpConversion,
DivcOpConversion, EmboxOpConversion, EmboxCharOpConversion,
EmboxProcOpConversion, ExtractValueOpConversion, FieldIndexOpConversion,
FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion,
GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion,
LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion,
NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion,
SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion,
Expand Down
20 changes: 20 additions & 0 deletions flang/lib/Optimizer/Dialect/FIROps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3940,6 +3940,26 @@ void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
build(builder, result, value, memref, {});
}

//===----------------------------------------------------------------------===//
// CopyOp
//===----------------------------------------------------------------------===//

void fir::CopyOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
mlir::Value source, mlir::Value destination,
bool noOverlap) {
mlir::UnitAttr noOverlapAttr =
noOverlap ? builder.getUnitAttr() : mlir::UnitAttr{};
build(builder, result, source, destination, noOverlapAttr);
}

llvm::LogicalResult fir::CopyOp::verify() {
mlir::Type sourceType = fir::unwrapRefType(getSource().getType());
mlir::Type destinationType = fir::unwrapRefType(getDestination().getType());
if (sourceType != destinationType)
return emitOpError("source and destination must have the same value type");
return mlir::success();
}

//===----------------------------------------------------------------------===//
// StringLitOp
//===----------------------------------------------------------------------===//
Expand Down
35 changes: 35 additions & 0 deletions flang/test/Fir/copy-codegen.fir
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Test fir.copy codegen.
// RUN: fir-opt --fir-to-llvm-ir %s -o - | FileCheck %s

!t=!fir.type<sometype{i:!fir.array<9xi32>}>

module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"} {

func.func @test_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
return
}
// CHECK-LABEL: llvm.func @test_copy_1(
// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)>
// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64
// CHECK: "llvm.intr.memcpy"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
// CHECK: llvm.return
// CHECK: }

func.func @test_copy_2(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
fir.copy %arg0 to %arg1 : !fir.ref<!t>, !fir.ref<!t>
return
}
// CHECK-LABEL: llvm.func @test_copy_2(
// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)>
// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64
// CHECK: "llvm.intr.memmove"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
// CHECK: llvm.return
// CHECK: }
}
9 changes: 9 additions & 0 deletions flang/test/Fir/fir-ops.fir
Original file line number Diff line number Diff line change
Expand Up @@ -933,3 +933,12 @@ func.func @test_call_arg_attrs_indirect(%arg0: i16, %arg1: (i16)-> i16) -> i16 {
%0 = fir.call %arg1(%arg0) : (i16 {llvm.noundef, llvm.signext}) -> (i16 {llvm.signext})
return %0 : i16
}

// CHECK-LABEL: @test_copy(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.type<sometype{i:i32}>>,
// CHECK-SAME: %[[VAL_1:.*]]: !fir.ptr<!fir.type<sometype{i:i32}>>
func.func @test_copy(%arg0: !fir.ref<!fir.type<sometype{i:i32}>>, %arg1: !fir.ptr<!fir.type<sometype{i:i32}>>) {
// CHECK: fir.copy %[[VAL_0]] to %[[VAL_1]] no_overlap : !fir.ref<!fir.type<sometype{i:i32}>>, !fir.ptr<!fir.type<sometype{i:i32}>>
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!fir.type<sometype{i:i32}>>, !fir.ptr<!fir.type<sometype{i:i32}>>
return
}
37 changes: 37 additions & 0 deletions flang/test/Fir/invalid.fir
Original file line number Diff line number Diff line change
Expand Up @@ -1018,3 +1018,40 @@ func.func @bad_is_assumed_size(%arg0: !fir.ref<!fir.array<*:none>>) {
%1 = fir.is_assumed_size %arg0 : (!fir.ref<!fir.array<*:none>>) -> i1
return
}

// -----

!t=!fir.type<sometype{i:i32}>
!t2=!fir.type<sometype2{j:i32}>
func.func @bad_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t2>) {
// expected-error@+1{{'fir.copy' op source and destination must have the same value type}}
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t2>
return
}

// -----

!t=!fir.type<sometype{i:i32}>
func.func @bad_copy_2(%arg0: !fir.ref<!t>, %arg1: !t) {
// expected-error@+1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.type<sometype{i:i32}>'}}
fir.copy %arg1 to %arg0 no_overlap : !t, !fir.ref<!t>
return
}

// -----

!t=!fir.array<?xi32>
func.func @bad_copy_3(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
// expected-error@+1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.ref<!fir.array<?xi32>>'}}
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
return
}

// -----

!t=f32
func.func @bad_copy_4(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
// expected-error@+1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.ref<f32>'}}
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
return
}
Loading