Skip to content

Commit da2d0ed

Browse files
authored
Merge pull request llvm#570 from AMD-Lightning-Internal/amd/dev/macurtis/revert-0c9a02355abc
Revert "[flang][fir] always use memcpy for fir.box (llvm#113949)"
2 parents e006345 + 30296ff commit da2d0ed

File tree

11 files changed

+217
-191
lines changed

11 files changed

+217
-191
lines changed

flang/lib/Optimizer/CodeGen/CodeGen.cpp

Lines changed: 39 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3197,10 +3197,9 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
31973197
llvm::LogicalResult
31983198
matchAndRewrite(fir::LoadOp load, OpAdaptor adaptor,
31993199
mlir::ConversionPatternRewriter &rewriter) const override {
3200-
32013200
mlir::Type llvmLoadTy = convertObjectType(load.getType());
32023201
if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(load.getType())) {
3203-
// fir.box is a special case because it is considered an ssa value in
3202+
// fir.box is a special case because it is considered as an ssa values in
32043203
// fir, but it is lowered as a pointer to a descriptor. So
32053204
// fir.ref<fir.box> and fir.box end up being the same llvm types and
32063205
// loading a fir.ref<fir.box> is implemented as taking a snapshot of the
@@ -3224,16 +3223,30 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
32243223
newBoxStorage = genAllocaAndAddrCastWithType(loc, llvmLoadTy,
32253224
defaultAlign, rewriter);
32263225

3227-
TypePair boxTypePair{boxTy, llvmLoadTy};
3228-
mlir::Value boxSize =
3229-
computeBoxSize(loc, boxTypePair, inputBoxStorage, rewriter);
3230-
auto memcpy = rewriter.create<mlir::LLVM::MemcpyOp>(
3231-
loc, newBoxStorage, inputBoxStorage, boxSize, /*isVolatile=*/false);
3232-
3233-
if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
3234-
memcpy.setTBAATags(*optionalTag);
3235-
else
3236-
attachTBAATag(memcpy, boxTy, boxTy, nullptr);
3226+
// TODO: always generate llvm.memcpy, LLVM is better at optimizing it than
3227+
// aggregate loads + stores.
3228+
if (boxTy.isAssumedRank()) {
3229+
3230+
TypePair boxTypePair{boxTy, llvmLoadTy};
3231+
mlir::Value boxSize =
3232+
computeBoxSize(loc, boxTypePair, inputBoxStorage, rewriter);
3233+
auto memcpy = rewriter.create<mlir::LLVM::MemcpyOp>(
3234+
loc, newBoxStorage, inputBoxStorage, boxSize, /*isVolatile=*/false);
3235+
if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
3236+
memcpy.setTBAATags(*optionalTag);
3237+
else
3238+
attachTBAATag(memcpy, boxTy, boxTy, nullptr);
3239+
} else {
3240+
auto boxValue = rewriter.create<mlir::LLVM::LoadOp>(loc, llvmLoadTy,
3241+
inputBoxStorage);
3242+
if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
3243+
boxValue.setTBAATags(*optionalTag);
3244+
else
3245+
attachTBAATag(boxValue, boxTy, boxTy, nullptr);
3246+
auto storeOp =
3247+
rewriter.create<mlir::LLVM::StoreOp>(loc, boxValue, newBoxStorage);
3248+
attachTBAATag(storeOp, boxTy, boxTy, nullptr);
3249+
}
32373250
rewriter.replaceOp(load, newBoxStorage);
32383251
} else {
32393252
auto loadOp = rewriter.create<mlir::LLVM::LoadOp>(
@@ -3517,13 +3530,20 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> {
35173530
mlir::LLVM::AliasAnalysisOpInterface newOp;
35183531
if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(storeTy)) {
35193532
mlir::Type llvmBoxTy = lowerTy().convertBoxTypeAsStruct(boxTy);
3520-
// Always use memcpy because LLVM is not as effective at optimizing
3521-
// aggregate loads/stores as it is optimizing memcpy.
3522-
TypePair boxTypePair{boxTy, llvmBoxTy};
3523-
mlir::Value boxSize =
3524-
computeBoxSize(loc, boxTypePair, llvmValue, rewriter);
3525-
newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
3526-
loc, llvmMemref, llvmValue, boxSize, /*isVolatile=*/false);
3533+
// fir.box value is actually in memory, load it first before storing it,
3534+
// or do a memcopy for assumed-rank descriptors.
3535+
if (boxTy.isAssumedRank()) {
3536+
TypePair boxTypePair{boxTy, llvmBoxTy};
3537+
mlir::Value boxSize =
3538+
computeBoxSize(loc, boxTypePair, llvmValue, rewriter);
3539+
newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
3540+
loc, llvmMemref, llvmValue, boxSize, /*isVolatile=*/false);
3541+
} else {
3542+
auto val =
3543+
rewriter.create<mlir::LLVM::LoadOp>(loc, llvmBoxTy, llvmValue);
3544+
attachTBAATag(val, boxTy, boxTy, nullptr);
3545+
newOp = rewriter.create<mlir::LLVM::StoreOp>(loc, val, llvmMemref);
3546+
}
35273547
} else {
35283548
newOp = rewriter.create<mlir::LLVM::StoreOp>(loc, llvmValue, llvmMemref);
35293549
}

flang/test/Fir/box.fir

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,12 @@ func.func @fa(%a : !fir.ref<!fir.array<100xf32>>) {
5656
// CHECK-LABEL: define void @b1(
5757
// CHECK-SAME: ptr captures(none) %[[res:.*]], ptr captures(none) %[[arg0:.*]], i64 %[[arg1:.*]])
5858
func.func @b1(%arg0 : !fir.ref<!fir.char<1,?>>, %arg1 : index) -> !fir.box<!fir.char<1,?>> {
59-
// CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }
6059
// CHECK: %[[size:.*]] = mul i64 ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64), %[[arg1]]
6160
// CHECK: insertvalue {{.*}} undef, i64 %[[size]], 1
6261
// CHECK: insertvalue {{.*}} i32 20240719, 2
6362
// CHECK: insertvalue {{.*}} ptr %[[arg0]], 0
6463
%x = fir.embox %arg0 typeparams %arg1 : (!fir.ref<!fir.char<1,?>>, index) -> !fir.box<!fir.char<1,?>>
65-
// CHECK: store {{.*}}, ptr %[[alloca]]
66-
// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 24, i1 false)
64+
// CHECK: store {{.*}}, ptr %[[res]]
6765
return %x : !fir.box<!fir.char<1,?>>
6866
}
6967

@@ -73,13 +71,11 @@ func.func @b1(%arg0 : !fir.ref<!fir.char<1,?>>, %arg1 : index) -> !fir.box<!fir.
7371
// CHECK-SAME: ptr captures(none) %[[arg0:.*]], i64 %[[arg1:.*]])
7472
func.func @b2(%arg0 : !fir.ref<!fir.array<?x!fir.char<1,5>>>, %arg1 : index) -> !fir.box<!fir.array<?x!fir.char<1,5>>> {
7573
%1 = fir.shape %arg1 : (index) -> !fir.shape<1>
76-
// CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }
7774
// CHECK: insertvalue {{.*}} { ptr undef, i64 ptrtoint (ptr getelementptr ([5 x i8], ptr null, i32 1) to i64), i32 20240719, i8 1, i8 40, i8 0, i8 0, {{.*}} }, i64 %[[arg1]], 7, 0, 1
7875
// CHECK: insertvalue {{.*}} %{{.*}}, i64 ptrtoint (ptr getelementptr ([5 x i8], ptr null, i32 1) to i64), 7, 0, 2
7976
// CHECK: insertvalue {{.*}} ptr %[[arg0]], 0
8077
%2 = fir.embox %arg0(%1) : (!fir.ref<!fir.array<?x!fir.char<1,5>>>, !fir.shape<1>) -> !fir.box<!fir.array<?x!fir.char<1,5>>>
81-
// CHECK: store {{.*}}, ptr %[[alloca]]
82-
// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 48, i1 false)
78+
// CHECK: store {{.*}}, ptr %[[res]]
8379
return %2 : !fir.box<!fir.array<?x!fir.char<1,5>>>
8480
}
8581

@@ -88,16 +84,14 @@ func.func @b2(%arg0 : !fir.ref<!fir.array<?x!fir.char<1,5>>>, %arg1 : index) ->
8884
// CHECK-SAME: ptr captures(none) %[[res:.*]], ptr captures(none) %[[arg0:.*]], i64 %[[arg1:.*]], i64 %[[arg2:.*]])
8985
func.func @b3(%arg0 : !fir.ref<!fir.array<?x!fir.char<1,?>>>, %arg1 : index, %arg2 : index) -> !fir.box<!fir.array<?x!fir.char<1,?>>> {
9086
%1 = fir.shape %arg2 : (index) -> !fir.shape<1>
91-
// CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }
9287
// CHECK: %[[size:.*]] = mul i64 ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64), %[[arg1]]
9388
// CHECK: insertvalue {{.*}} i64 %[[size]], 1
9489
// CHECK: insertvalue {{.*}} i32 20240719, 2
9590
// CHECK: insertvalue {{.*}} i64 %[[arg2]], 7, 0, 1
9691
// CHECK: insertvalue {{.*}} i64 %[[size]], 7, 0, 2
9792
// CHECK: insertvalue {{.*}} ptr %[[arg0]], 0
9893
%2 = fir.embox %arg0(%1) typeparams %arg1 : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.box<!fir.array<?x!fir.char<1,?>>>
99-
// CHECK: store {{.*}}, ptr %[[alloca]]
100-
// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 48, i1 false)
94+
// CHECK: store {{.*}}, ptr %[[res]]
10195
return %2 : !fir.box<!fir.array<?x!fir.char<1,?>>>
10296
}
10397

@@ -107,16 +101,14 @@ func.func @b3(%arg0 : !fir.ref<!fir.array<?x!fir.char<1,?>>>, %arg1 : index, %ar
107101
func.func @b4(%arg0 : !fir.ref<!fir.array<7x!fir.char<1,?>>>, %arg1 : index) -> !fir.box<!fir.array<7x!fir.char<1,?>>> {
108102
%c_7 = arith.constant 7 : index
109103
%1 = fir.shape %c_7 : (index) -> !fir.shape<1>
110-
// CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }
111104
// CHECK: %[[size:.*]] = mul i64 ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64), %[[arg1]]
112105
// CHECK: insertvalue {{.*}} i64 %[[size]], 1
113106
// CHECK: insertvalue {{.*}} i32 20240719, 2
114107
// CHECK: insertvalue {{.*}} i64 7, 7, 0, 1
115108
// CHECK: insertvalue {{.*}} i64 %[[size]], 7, 0, 2
116109
// CHECK: insertvalue {{.*}} ptr %[[arg0]], 0
117110
%x = fir.embox %arg0(%1) typeparams %arg1 : (!fir.ref<!fir.array<7x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.box<!fir.array<7x!fir.char<1,?>>>
118-
// CHECK: store {{.*}}, ptr %[[alloca]]
119-
// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 48, i1 false)
111+
// CHECK: store {{.*}}, ptr %[[res]]
120112
return %x : !fir.box<!fir.array<7x!fir.char<1,?>>>
121113
}
122114

@@ -125,7 +117,8 @@ func.func @b4(%arg0 : !fir.ref<!fir.array<7x!fir.char<1,?>>>, %arg1 : index) ->
125117
// CHECK-SAME: ptr captures(none) %[[arg0:.*]], ptr %[[arg1:.*]])
126118
func.func @b5(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>, %arg1 : !fir.box<!fir.heap<!fir.array<?x?xf32>>>) {
127119
fir.store %arg1 to %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
128-
// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %0, ptr %1, i32 72, i1 false)
120+
// CHECK: %[[boxLoad:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [2 x [3 x i64]] }, ptr %[[arg1]]
121+
// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [2 x [3 x i64]] } %[[boxLoad]], ptr %[[arg0]]
129122
return
130123
}
131124

flang/test/Fir/convert-to-llvm-openmp-and-fir.fir

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -799,8 +799,8 @@ func.func @_QPs(%arg0: !fir.ref<complex<f32>> {fir.bindc_name = "x"}) {
799799
//CHECK: omp.parallel {
800800
//CHECK: %[[CONST_1:.*]] = llvm.mlir.constant(1 : i32) : i32
801801
//CHECK: %[[ALLOCA_1:.*]] = llvm.alloca %[[CONST_1:.*]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
802-
//CHECK: %[[SIZE:.*]] = llvm.mlir.constant(24 : i32) : i32
803-
//CHECK: "llvm.intr.memcpy"(%[[ALLOCA_1]], %[[ALLOCA]], %[[SIZE]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
802+
//CHECK: %[[LOAD:.*]] = llvm.load %[[ALLOCA]] : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
803+
//CHECK: llvm.store %[[LOAD]], %[[ALLOCA_1]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
804804
//CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA_1]][0, 0] : (!llvm.ptr) -> !llvm.ptr
805805
//CHECK: %[[LOAD_2:.*]] = llvm.load %[[GEP]] : !llvm.ptr -> !llvm.ptr
806806
//CHECK: omp.terminator
@@ -1160,7 +1160,8 @@ func.func @map_dtype_alloca_mem2(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_
11601160
%c0 = arith.constant 0 : index
11611161
// CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true}
11621162
%0 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%c4 : index) extent(%c4 : index) stride(%c1 : index) start_idx(%c0 : index) {stride_in_bytes = true}
1163-
// CHECK: "llvm.intr.memcpy"(%[[DTYPE_ALLOCATABLE_ALOCA]], %[[ARG_0]], {{.*}}) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
1163+
// CHECK: %[[LOAD_9:.*]] = llvm.load %[[ARG_0]] : !llvm.ptr -> [[DESC_TY]]
1164+
// CHECK: llvm.store %[[LOAD_9]], %[[DTYPE_ALLOCATABLE_ALOCA]] : [[DESC_TY]], !llvm.ptr
11641165
%1 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFRecTy{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>
11651166
// CHECK: %[[GEP_DTYPE_BADDR:.*]] = llvm.getelementptr %[[DTYPE_ALLOCATABLE_ALOCA]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY]]
11661167
// CHECK: %[[LOAD_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr
@@ -1172,7 +1173,8 @@ func.func @map_dtype_alloca_mem2(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_
11721173
%4 = omp.map.info var_ptr(%2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%3 : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%0) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
11731174
// CHECK: %[[MAP_MEMBER_DESC:.*]] = omp.map.info var_ptr(%[[GEP_DTYPE_MEMBER]] : !llvm.ptr, [[DESC_TY2]]) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
11741175
%5 = omp.map.info var_ptr(%2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
1175-
// CHECK: "llvm.intr.memcpy"(%[[DTYPE_ALLOCATABLE_ALOCA_2]], %[[ARG_0]], {{.*}}) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
1176+
// CHECK: %[[LOAD_16:.*]] = llvm.load %[[ARG_0]] : !llvm.ptr -> [[DESC_TY]]
1177+
// CHECK: llvm.store %[[LOAD_16]], %[[DTYPE_ALLOCATABLE_ALOCA_2]] : [[DESC_TY]], !llvm.ptr
11761178
%6 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFRecTy{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>>>>
11771179
// CHECK: %[[GEP_DTYPE_BADDR:.*]] = llvm.getelementptr %[[DTYPE_ALLOCATABLE_ALOCA_2]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY]]
11781180
// CHECK: %[[LOAD_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr
@@ -1208,7 +1210,8 @@ func.func @map_nested_dtype_alloca_mem(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.
12081210
%c0 = arith.constant 0 : index
12091211
// CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true}
12101212
%0 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%c4 : index) extent(%c4 : index) stride(%c1 : index) start_idx(%c0 : index) {stride_in_bytes = true}
1211-
// CHECK: "llvm.intr.memcpy"(%[[DTYPE_ALLOCATABLE_ALOCA]], %[[ARG_0]], {{.*}}) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
1213+
// CHECK: %[[LOAD_11:.*]] = llvm.load %[[ARG_0]] : !llvm.ptr -> [[DESC_TY]]
1214+
// CHECK: llvm.store %[[LOAD_11]], %[[DTYPE_ALLOCATABLE_ALOCA]] : [[DESC_TY]], !llvm.ptr
12121215
%1 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFRecTy{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>
12131216
// CHECK: %[[GEP_DTYPE_BADDR:.*]] = llvm.getelementptr %[[DTYPE_ALLOCATABLE_ALOCA]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY]]
12141217
// CHECK: %[[LOAD_GEP_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr
@@ -1222,7 +1225,8 @@ func.func @map_nested_dtype_alloca_mem(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.
12221225
%5 = omp.map.info var_ptr(%3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%4 : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%0) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
12231226
// CHECK: %[[MAP_NESTED_MEMBER_DESC:.*]] = omp.map.info var_ptr(%[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER]] : !llvm.ptr, [[DESC_TY2]]) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
12241227
%6 = omp.map.info var_ptr(%3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
1225-
// CHECK: "llvm.intr.memcpy"(%[[DTYPE_ALLOCATABLE_ALOCA_2]], %[[ARG_0]], {{.*}}) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
1228+
// CHECK: %[[LOAD_19:.*]] = llvm.load %[[ARG_0]] : !llvm.ptr -> [[DESC_TY]]
1229+
// CHECK: llvm.store %[[LOAD_19]], %[[DTYPE_ALLOCATABLE_ALOCA_2]] : [[DESC_TY]], !llvm.ptr
12261230
// CHECK: %[[GEP_DTYPE_BADDR:.*]] = llvm.getelementptr %[[DTYPE_ALLOCATABLE_ALOCA_2]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY]]
12271231
// CHECK: %[[LOAD_GEP_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr
12281232
%7 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QFRecTy{i:f32,scalar:!fir.box<!fir.heap<i32>>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box<!fir.heap<!fir.array<?xi32>>>,k:i32}>}>>>>

flang/test/Fir/convert-to-llvm.fir

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -862,8 +862,8 @@ func.func @test_store_box(%array : !fir.ref<!fir.box<!fir.array<?x?xf32>>>, %box
862862
// CHECK-LABEL: llvm.func @test_store_box
863863
// CHECK-SAME: (%[[arg0:.*]]: !llvm.ptr,
864864
// CHECK-SAME: %[[arg1:.*]]: !llvm.ptr) {
865-
// CHECK-NEXT: %[[size:.*]] = llvm.mlir.constant(72 : i32) : i32
866-
// CHECK-NEXT: "llvm.intr.memcpy"(%[[arg0]], %[[arg1]], %[[size]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
865+
// CHECK-NEXT: %[[box_to_store:.*]] = llvm.load %arg1 : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i{{.*}}>>)>
866+
// CHECK-NEXT: llvm.store %[[box_to_store]], %[[arg0]] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i{{.*}}>>)>, !llvm.ptr
867867
// CHECK-NEXT: llvm.return
868868
// CHECK-NEXT: }
869869

@@ -875,17 +875,15 @@ func.func @store_unlimited_polymorphic_box(%arg0 : !fir.class<none>, %arg1 : !fi
875875
fir.store %arg3 to %arg3r : !fir.ref<!fir.box<!fir.array<?xnone>>>
876876
return
877877
}
878-
// CHECK: llvm.func @store_unlimited_polymorphic_box(%[[VAL_0:.*]]: !llvm.ptr, %[[VAL_1:.*]]: !llvm.ptr, %[[VAL_2:.*]]: !llvm.ptr, %[[VAL_3:.*]]: !llvm.ptr, %[[VAL_4:.*]]: !llvm.ptr, %[[VAL_5:.*]]: !llvm.ptr, %[[VAL_6:.*]]: !llvm.ptr, %[[VAL_7:.*]]: !llvm.ptr) {
879-
// CHECK: %[[VAL_8:.*]] = llvm.mlir.constant(40 : i32) : i32
880-
// CHECK: "llvm.intr.memcpy"(%[[VAL_4]], %[[VAL_0]], %[[VAL_8]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
881-
// CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(64 : i32) : i32
882-
// CHECK: "llvm.intr.memcpy"(%[[VAL_5]], %[[VAL_1]], %[[VAL_9]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
883-
// CHECK: %[[VAL_10:.*]] = llvm.mlir.constant(40 : i32) : i32
884-
// CHECK: "llvm.intr.memcpy"(%[[VAL_6]], %[[VAL_2]], %[[VAL_10]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
885-
// CHECK: %[[VAL_11:.*]] = llvm.mlir.constant(64 : i32) : i32
886-
// CHECK: "llvm.intr.memcpy"(%[[VAL_7]], %[[VAL_3]], %[[VAL_11]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
887-
// CHECK: llvm.return
888-
// CHECK: }
878+
// CHECK-LABEL: llvm.func @store_unlimited_polymorphic_box(
879+
// CHECK: %[[VAL_8:.*]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
880+
// CHECK: llvm.store %[[VAL_8]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>, !llvm.ptr
881+
// CHECK: %[[VAL_9:.*]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>, ptr, array<1 x i{{.*}}>)>
882+
// CHECK: llvm.store %[[VAL_9]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>, ptr, array<1 x i{{.*}}>)>, !llvm.ptr
883+
// CHECK: %[[VAL_10:.*]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
884+
// CHECK: llvm.store %[[VAL_10]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>, !llvm.ptr
885+
// CHECK: %[[VAL_11:.*]] = llvm.load %{{.*}}: !llvm.ptr
886+
// CHECK: llvm.store %[[VAL_11]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>, ptr, array<1 x i{{.*}}>)>, !llvm.ptr
889887

890888

891889
// -----
@@ -937,8 +935,8 @@ func.func @test_load_box(%addr : !fir.ref<!fir.box<!fir.array<10xf32>>>) {
937935
// GENERIC-NEXT: %[[box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE:.*]])>
938936
// AMDGPU-NEXT: %[[alloca_box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE:.*]])>{{.*}} : (i32) -> !llvm.ptr<5>
939937
// AMDGPU-NEXT: %[[box_copy:.*]] = llvm.addrspacecast %[[alloca_box_copy]] : !llvm.ptr<5> to !llvm.ptr
940-
// CHECK-NEXT: %[[size:.*]] = llvm.mlir.constant(48 : i32) : i32
941-
// CHECK-NEXT: "llvm.intr.memcpy"(%[[box_copy]], %[[arg0]], %[[size]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
938+
// CHECK-NEXT: %[[box_val:.*]] = llvm.load %[[arg0]] : !llvm.ptr -> !llvm.struct<([[DESC_TYPE]])>
939+
// CHECK-NEXT: llvm.store %[[box_val]], %[[box_copy]] : !llvm.struct<([[DESC_TYPE]])>, !llvm.ptr
942940
// CHECK-NEXT: llvm.call @takes_box(%[[box_copy]]) : (!llvm.ptr) -> ()
943941
// CHECK-NEXT: llvm.return
944942
// CHECK-NEXT: }

0 commit comments

Comments
 (0)