Skip to content

Commit 8a56631

Browse files
DominikAdamskironlieb
authored andcommitted
[Flang][MemCpy] Restore memcpy upstream patch
It restores upstream commit: commit 0c9a023 Author: Asher Mancinelli <[email protected]> Date: Wed Oct 30 09:50:27 2024 -0700 [flang][fir] always use memcpy for fir.box (llvm#113949)
1 parent 544233f commit 8a56631

10 files changed

+167
-193
lines changed

flang/lib/Optimizer/CodeGen/CodeGen.cpp

Lines changed: 19 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -3269,9 +3269,10 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
32693269
llvm::LogicalResult
32703270
matchAndRewrite(fir::LoadOp load, OpAdaptor adaptor,
32713271
mlir::ConversionPatternRewriter &rewriter) const override {
3272+
32723273
mlir::Type llvmLoadTy = convertObjectType(load.getType());
32733274
if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(load.getType())) {
3274-
// fir.box is a special case because it is considered as an ssa values in
3275+
// fir.box is a special case because it is considered an ssa value in
32753276
// fir, but it is lowered as a pointer to a descriptor. So
32763277
// fir.ref<fir.box> and fir.box end up being the same llvm types and
32773278
// loading a fir.ref<fir.box> is implemented as taking a snapshot of the
@@ -3295,30 +3296,16 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
32953296
newBoxStorage = genAllocaAndAddrCastWithType(loc, llvmLoadTy,
32963297
defaultAlign, rewriter);
32973298

3298-
// TODO: always generate llvm.memcpy, LLVM is better at optimizing it than
3299-
// aggregate loads + stores.
3300-
if (boxTy.isAssumedRank()) {
3301-
3302-
TypePair boxTypePair{boxTy, llvmLoadTy};
3303-
mlir::Value boxSize =
3304-
computeBoxSize(loc, boxTypePair, inputBoxStorage, rewriter);
3305-
auto memcpy = rewriter.create<mlir::LLVM::MemcpyOp>(
3306-
loc, newBoxStorage, inputBoxStorage, boxSize, /*isVolatile=*/false);
3307-
if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
3308-
memcpy.setTBAATags(*optionalTag);
3309-
else
3310-
attachTBAATag(memcpy, boxTy, boxTy, nullptr);
3311-
} else {
3312-
auto boxValue = rewriter.create<mlir::LLVM::LoadOp>(loc, llvmLoadTy,
3313-
inputBoxStorage);
3314-
if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
3315-
boxValue.setTBAATags(*optionalTag);
3316-
else
3317-
attachTBAATag(boxValue, boxTy, boxTy, nullptr);
3318-
auto storeOp =
3319-
rewriter.create<mlir::LLVM::StoreOp>(loc, boxValue, newBoxStorage);
3320-
attachTBAATag(storeOp, boxTy, boxTy, nullptr);
3321-
}
3299+
TypePair boxTypePair{boxTy, llvmLoadTy};
3300+
mlir::Value boxSize =
3301+
computeBoxSize(loc, boxTypePair, inputBoxStorage, rewriter);
3302+
auto memcpy = rewriter.create<mlir::LLVM::MemcpyOp>(
3303+
loc, newBoxStorage, inputBoxStorage, boxSize, /*isVolatile=*/false);
3304+
3305+
if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
3306+
memcpy.setTBAATags(*optionalTag);
3307+
else
3308+
attachTBAATag(memcpy, boxTy, boxTy, nullptr);
33223309
rewriter.replaceOp(load, newBoxStorage);
33233310
} else {
33243311
auto loadOp = rewriter.create<mlir::LLVM::LoadOp>(
@@ -3602,20 +3589,13 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> {
36023589
mlir::LLVM::AliasAnalysisOpInterface newOp;
36033590
if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(storeTy)) {
36043591
mlir::Type llvmBoxTy = lowerTy().convertBoxTypeAsStruct(boxTy);
3605-
// fir.box value is actually in memory, load it first before storing it,
3606-
// or do a memcopy for assumed-rank descriptors.
3607-
if (boxTy.isAssumedRank()) {
3608-
TypePair boxTypePair{boxTy, llvmBoxTy};
3609-
mlir::Value boxSize =
3610-
computeBoxSize(loc, boxTypePair, llvmValue, rewriter);
3611-
newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
3612-
loc, llvmMemref, llvmValue, boxSize, /*isVolatile=*/false);
3613-
} else {
3614-
auto val =
3615-
rewriter.create<mlir::LLVM::LoadOp>(loc, llvmBoxTy, llvmValue);
3616-
attachTBAATag(val, boxTy, boxTy, nullptr);
3617-
newOp = rewriter.create<mlir::LLVM::StoreOp>(loc, val, llvmMemref);
3618-
}
3592+
// Always use memcpy because LLVM is not as effective at optimizing
3593+
// aggregate loads/stores as it is optimizing memcpy.
3594+
TypePair boxTypePair{boxTy, llvmBoxTy};
3595+
mlir::Value boxSize =
3596+
computeBoxSize(loc, boxTypePair, llvmValue, rewriter);
3597+
newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
3598+
loc, llvmMemref, llvmValue, boxSize, /*isVolatile=*/false);
36193599
} else {
36203600
newOp = rewriter.create<mlir::LLVM::StoreOp>(loc, llvmValue, llvmMemref);
36213601
}

flang/test/Fir/box.fir

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
// RUN: tco -o - %s | FileCheck %s
2-
// memcpy related
3-
// XFAIL: *
42

53
// Global box initialization (test must come first because llvm globals are emitted first).
64
// CHECK-LABEL: @globalx = internal global { ptr, i64, i32, i8, i8, i8, i8 } { ptr null, i64 4, i32 20240719, i8 0, i8 9, i8 2, i8 0 }

flang/test/Fir/convert-to-llvm-openmp-and-fir.fir

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -799,8 +799,8 @@ func.func @_QPs(%arg0: !fir.ref<complex<f32>> {fir.bindc_name = "x"}) {
799799
//CHECK: omp.parallel {
800800
//CHECK: %[[CONST_1:.*]] = llvm.mlir.constant(1 : i32) : i32
801801
//CHECK: %[[ALLOCA_1:.*]] = llvm.alloca %[[CONST_1:.*]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
802-
//CHECK: %[[LOAD:.*]] = llvm.load %[[ALLOCA]] : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
803-
//CHECK: llvm.store %[[LOAD]], %[[ALLOCA_1]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
802+
//CHECK: %[[SIZE:.*]] = llvm.mlir.constant(24 : i32) : i32
803+
//CHECK: "llvm.intr.memcpy"(%[[ALLOCA_1]], %[[ALLOCA]], %[[SIZE]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
804804
//CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA_1]][0, 0] : (!llvm.ptr) -> !llvm.ptr
805805
//CHECK: %[[LOAD_2:.*]] = llvm.load %[[GEP]] : !llvm.ptr -> !llvm.ptr
806806
//CHECK: omp.terminator
@@ -1017,7 +1017,7 @@ func.func @omp_map_common_block_using_common_block_symbol() {
10171017
%8 = fir.load %4 : !fir.ref<i32>
10181018
%9 = arith.addi %8, %c20_i32 : i32
10191019
fir.store %9 to %7 : !fir.ref<i32>
1020-
omp.terminator
1020+
omp.terminator
10211021
}
10221022
return
10231023
}
@@ -1054,7 +1054,7 @@ func.func @omp_map_common_block_using_common_block_members() {
10541054
%9 = fir.load %arg0 : !fir.ref<i32>
10551055
%10 = arith.muli %9, %c10_i32 : i32
10561056
fir.store %10 to %arg1 : !fir.ref<i32>
1057-
omp.terminator
1057+
omp.terminator
10581058
}
10591059
return
10601060
}
@@ -1155,8 +1155,7 @@ func.func @map_dtype_alloca_mem2(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_
11551155
%c0 = arith.constant 0 : index
11561156
// CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true}
11571157
%0 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%c4 : index) extent(%c4 : index) stride(%c1 : index) start_idx(%c0 : index) {stride_in_bytes = true}
1158-
// CHECK: %[[LOAD_9:.*]] = llvm.load %[[ARG_0]] : !llvm.ptr -> [[DESC_TY]]
1159-
// CHECK: llvm.store %[[LOAD_9]], %[[DTYPE_ALLOCATABLE_ALOCA]] : [[DESC_TY]], !llvm.ptr
1158+
// CHECK: "llvm.intr.memcpy"(%[[DTYPE_ALLOCATABLE_ALOCA]], %[[ARG_0]], {{.*}}) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
11601159
%1 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFRecTy{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>
11611160
// CHECK: %[[GEP_DTYPE_BADDR:.*]] = llvm.getelementptr %[[DTYPE_ALLOCATABLE_ALOCA]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY]]
11621161
// CHECK: %[[LOAD_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr
@@ -1168,8 +1167,7 @@ func.func @map_dtype_alloca_mem2(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_
11681167
%4 = omp.map.info var_ptr(%2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%3 : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%0) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
11691168
// CHECK: %[[MAP_MEMBER_DESC:.*]] = omp.map.info var_ptr(%[[GEP_DTYPE_MEMBER]] : !llvm.ptr, [[DESC_TY2]]) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
11701169
%5 = omp.map.info var_ptr(%2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
1171-
// CHECK: %[[LOAD_16:.*]] = llvm.load %[[ARG_0]] : !llvm.ptr -> [[DESC_TY]]
1172-
// CHECK: llvm.store %[[LOAD_16]], %[[DTYPE_ALLOCATABLE_ALOCA_2]] : [[DESC_TY]], !llvm.ptr
1170+
// CHECK: "llvm.intr.memcpy"(%[[DTYPE_ALLOCATABLE_ALOCA_2]], %[[ARG_0]], {{.*}}) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
11731171
%6 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFRecTy{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>
11741172
// CHECK: %[[GEP_DTYPE_BADDR:.*]] = llvm.getelementptr %[[DTYPE_ALLOCATABLE_ALOCA_2]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY]]
11751173
// CHECK: %[[LOAD_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr
@@ -1205,8 +1203,7 @@ func.func @map_nested_dtype_alloca_mem(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.
12051203
%c0 = arith.constant 0 : index
12061204
// CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true}
12071205
%0 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%c4 : index) extent(%c4 : index) stride(%c1 : index) start_idx(%c0 : index) {stride_in_bytes = true}
1208-
// CHECK: %[[LOAD_11:.*]] = llvm.load %[[ARG_0]] : !llvm.ptr -> [[DESC_TY]]
1209-
// CHECK: llvm.store %[[LOAD_11]], %[[DTYPE_ALLOCATABLE_ALOCA]] : [[DESC_TY]], !llvm.ptr
1206+
// CHECK: "llvm.intr.memcpy"(%[[DTYPE_ALLOCATABLE_ALOCA]], %[[ARG_0]], {{.*}}) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
12101207
%1 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFRecTy{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>
12111208
// CHECK: %[[GEP_DTYPE_BADDR:.*]] = llvm.getelementptr %[[DTYPE_ALLOCATABLE_ALOCA]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY]]
12121209
// CHECK: %[[LOAD_GEP_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr
@@ -1220,8 +1217,7 @@ func.func @map_nested_dtype_alloca_mem(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.
12201217
%5 = omp.map.info var_ptr(%3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%4 : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%0) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
12211218
// CHECK: %[[MAP_NESTED_MEMBER_DESC:.*]] = omp.map.info var_ptr(%[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER]] : !llvm.ptr, [[DESC_TY2]]) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
12221219
%6 = omp.map.info var_ptr(%3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
1223-
// CHECK: %[[LOAD_19:.*]] = llvm.load %[[ARG_0]] : !llvm.ptr -> [[DESC_TY]]
1224-
// CHECK: llvm.store %[[LOAD_19]], %[[DTYPE_ALLOCATABLE_ALOCA_2]] : [[DESC_TY]], !llvm.ptr
1220+
// CHECK: "llvm.intr.memcpy"(%[[DTYPE_ALLOCATABLE_ALOCA_2]], %[[ARG_0]], {{.*}}) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
12251221
// CHECK: %[[GEP_DTYPE_BADDR:.*]] = llvm.getelementptr %[[DTYPE_ALLOCATABLE_ALOCA_2]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY]]
12261222
// CHECK: %[[LOAD_GEP_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr
12271223
%7 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFRecTy{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>

flang/test/Fir/convert-to-llvm.fir

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -870,8 +870,8 @@ func.func @test_store_box(%array : !fir.ref<!fir.box<!fir.array<?x?xf32>>>, %box
870870
// CHECK-LABEL: llvm.func @test_store_box
871871
// CHECK-SAME: (%[[arg0:.*]]: !llvm.ptr,
872872
// CHECK-SAME: %[[arg1:.*]]: !llvm.ptr) {
873-
// CHECK-NEXT: %[[box_to_store:.*]] = llvm.load %arg1 : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i{{.*}}>>)>
874-
// CHECK-NEXT: llvm.store %[[box_to_store]], %[[arg0]] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i{{.*}}>>)>, !llvm.ptr
873+
// CHECK-NEXT: %[[size:.*]] = llvm.mlir.constant(72 : i32) : i32
874+
// CHECK-NEXT: "llvm.intr.memcpy"(%[[arg0]], %[[arg1]], %[[size]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
875875
// CHECK-NEXT: llvm.return
876876
// CHECK-NEXT: }
877877

@@ -883,15 +883,17 @@ func.func @store_unlimited_polymorphic_box(%arg0 : !fir.class<none>, %arg1 : !fi
883883
fir.store %arg3 to %arg3r : !fir.ref<!fir.box<!fir.array<?xnone>>>
884884
return
885885
}
886-
// CHECK-LABEL: llvm.func @store_unlimited_polymorphic_box(
887-
// CHECK: %[[VAL_8:.*]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
888-
// CHECK: llvm.store %[[VAL_8]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>, !llvm.ptr
889-
// CHECK: %[[VAL_9:.*]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>, ptr, array<1 x i{{.*}}>)>
890-
// CHECK: llvm.store %[[VAL_9]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>, ptr, array<1 x i{{.*}}>)>, !llvm.ptr
891-
// CHECK: %[[VAL_10:.*]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
892-
// CHECK: llvm.store %[[VAL_10]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>, !llvm.ptr
893-
// CHECK: %[[VAL_11:.*]] = llvm.load %{{.*}}: !llvm.ptr
894-
// CHECK: llvm.store %[[VAL_11]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>, ptr, array<1 x i{{.*}}>)>, !llvm.ptr
886+
// CHECK: llvm.func @store_unlimited_polymorphic_box(%[[VAL_0:.*]]: !llvm.ptr, %[[VAL_1:.*]]: !llvm.ptr, %[[VAL_2:.*]]: !llvm.ptr, %[[VAL_3:.*]]: !llvm.ptr, %[[VAL_4:.*]]: !llvm.ptr, %[[VAL_5:.*]]: !llvm.ptr, %[[VAL_6:.*]]: !llvm.ptr, %[[VAL_7:.*]]: !llvm.ptr) {
887+
// CHECK: %[[VAL_8:.*]] = llvm.mlir.constant(40 : i32) : i32
888+
// CHECK: "llvm.intr.memcpy"(%[[VAL_4]], %[[VAL_0]], %[[VAL_8]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
889+
// CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(64 : i32) : i32
890+
// CHECK: "llvm.intr.memcpy"(%[[VAL_5]], %[[VAL_1]], %[[VAL_9]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
891+
// CHECK: %[[VAL_10:.*]] = llvm.mlir.constant(40 : i32) : i32
892+
// CHECK: "llvm.intr.memcpy"(%[[VAL_6]], %[[VAL_2]], %[[VAL_10]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
893+
// CHECK: %[[VAL_11:.*]] = llvm.mlir.constant(64 : i32) : i32
894+
// CHECK: "llvm.intr.memcpy"(%[[VAL_7]], %[[VAL_3]], %[[VAL_11]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
895+
// CHECK: llvm.return
896+
// CHECK: }
895897

896898

897899
// -----
@@ -943,8 +945,8 @@ func.func @test_load_box(%addr : !fir.ref<!fir.box<!fir.array<10xf32>>>) {
943945
// GENERIC-NEXT: %[[box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE:.*]])>
944946
// AMDGPU-NEXT: %[[alloca_box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE:.*]])>{{.*}} : (i32) -> !llvm.ptr<5>
945947
// AMDGPU-NEXT: %[[box_copy:.*]] = llvm.addrspacecast %[[alloca_box_copy]] : !llvm.ptr<5> to !llvm.ptr
946-
// CHECK-NEXT: %[[box_val:.*]] = llvm.load %[[arg0]] : !llvm.ptr -> !llvm.struct<([[DESC_TYPE]])>
947-
// CHECK-NEXT: llvm.store %[[box_val]], %[[box_copy]] : !llvm.struct<([[DESC_TYPE]])>, !llvm.ptr
948+
// CHECK-NEXT: %[[size:.*]] = llvm.mlir.constant(48 : i32) : i32
949+
// CHECK-NEXT: "llvm.intr.memcpy"(%[[box_copy]], %[[arg0]], %[[size]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
948950
// CHECK-NEXT: llvm.call @takes_box(%[[box_copy]]) : (!llvm.ptr) -> ()
949951
// CHECK-NEXT: llvm.return
950952
// CHECK-NEXT: }

0 commit comments

Comments
 (0)