Skip to content

Commit 63897a5

Browse files
authored
[MLIR][SROA] Replace pattern based approach with a one-shot one (#85437)
This commit changes MLIR's SROA implementation back from being pattern based into a full pass. This is beneficial for upcoming changes that rely more heavily on the datalayout. Unfortunately, this change required substantial test changes, as the IRBuilder no cleans up the IR.
1 parent 3e2992f commit 63897a5

File tree

3 files changed

+86
-80
lines changed

3 files changed

+86
-80
lines changed

mlir/include/mlir/Transforms/SROA.h

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,9 @@
99
#ifndef MLIR_TRANSFORMS_SROA_H
1010
#define MLIR_TRANSFORMS_SROA_H
1111

12-
#include "mlir/IR/PatternMatch.h"
1312
#include "mlir/Interfaces/MemorySlotInterfaces.h"
1413
#include "mlir/Support/LogicalResult.h"
1514
#include "llvm/ADT/Statistic.h"
16-
#include <variant>
1715

1816
namespace mlir {
1917

@@ -29,24 +27,6 @@ struct SROAStatistics {
2927
llvm::Statistic *maxSubelementAmount = nullptr;
3028
};
3129

32-
/// Pattern applying SROA to the regions of the operations on which it
33-
/// matches.
34-
class SROAPattern
35-
: public OpInterfaceRewritePattern<DestructurableAllocationOpInterface> {
36-
public:
37-
using OpInterfaceRewritePattern::OpInterfaceRewritePattern;
38-
39-
SROAPattern(MLIRContext *context, SROAStatistics statistics = {},
40-
PatternBenefit benefit = 1)
41-
: OpInterfaceRewritePattern(context, benefit), statistics(statistics) {}
42-
43-
LogicalResult matchAndRewrite(DestructurableAllocationOpInterface allocator,
44-
PatternRewriter &rewriter) const override;
45-
46-
private:
47-
SROAStatistics statistics;
48-
};
49-
5030
/// Attempts to destructure the slots of destructurable allocators. Returns
5131
/// failure if no slot was destructured.
5232
LogicalResult tryToDestructureMemorySlots(

mlir/lib/Transforms/SROA.cpp

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
#include "mlir/Transforms/SROA.h"
1010
#include "mlir/Analysis/SliceAnalysis.h"
1111
#include "mlir/Interfaces/MemorySlotInterfaces.h"
12-
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
1312
#include "mlir/Transforms/Passes.h"
1413

1514
namespace mlir {
@@ -205,13 +204,6 @@ LogicalResult mlir::tryToDestructureMemorySlots(
205204
return success(destructuredAny);
206205
}
207206

208-
LogicalResult
209-
SROAPattern::matchAndRewrite(DestructurableAllocationOpInterface allocator,
210-
PatternRewriter &rewriter) const {
211-
hasBoundedRewriteRecursion();
212-
return tryToDestructureMemorySlots({allocator}, rewriter, statistics);
213-
}
214-
215207
namespace {
216208

217209
struct SROA : public impl::SROABase<SROA> {
@@ -223,12 +215,35 @@ struct SROA : public impl::SROABase<SROA> {
223215
SROAStatistics statistics{&destructuredAmount, &slotsWithMemoryBenefit,
224216
&maxSubelementAmount};
225217

226-
RewritePatternSet rewritePatterns(&getContext());
227-
rewritePatterns.add<SROAPattern>(&getContext(), statistics);
228-
FrozenRewritePatternSet frozen(std::move(rewritePatterns));
218+
bool changed = false;
219+
220+
for (Region &region : scopeOp->getRegions()) {
221+
if (region.getBlocks().empty())
222+
continue;
229223

230-
if (failed(applyPatternsAndFoldGreedily(scopeOp, frozen)))
231-
signalPassFailure();
224+
OpBuilder builder(&region.front(), region.front().begin());
225+
IRRewriter rewriter(builder);
226+
227+
// Destructuring a slot can allow for further destructuring of other
228+
// slots, destructuring is tried until no destructuring succeeds.
229+
while (true) {
230+
SmallVector<DestructurableAllocationOpInterface> allocators;
231+
// Build a list of allocators to attempt to destructure the slots of.
232+
// TODO: Update list on the fly to avoid repeated visiting of the same
233+
// allocators.
234+
region.walk([&](DestructurableAllocationOpInterface allocator) {
235+
allocators.emplace_back(allocator);
236+
});
237+
238+
if (failed(
239+
tryToDestructureMemorySlots(allocators, rewriter, statistics)))
240+
break;
241+
242+
changed = true;
243+
}
244+
}
245+
if (!changed)
246+
markAllAnalysesPreserved();
232247
}
233248
};
234249

mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir

Lines changed: 58 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -146,21 +146,22 @@ llvm.func @invalid_indirect_memset() -> i32 {
146146

147147
// CHECK-LABEL: llvm.func @memset_double_use
148148
llvm.func @memset_double_use() -> i32 {
149-
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
150-
// CHECK-DAG: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
151-
// CHECK-DAG: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
152-
// CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
153-
// After SROA, only one i32 will be actually used, so only 4 bytes will be set.
154-
// CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
149+
// CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
150+
// CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
151+
// CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
152+
// CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
155153
%0 = llvm.mlir.constant(1 : i32) : i32
156154
%1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
157155
%memset_value = llvm.mlir.constant(42 : i8) : i8
158156
// 8 bytes means it will span over the two i32 entries.
159157
%memset_len = llvm.mlir.constant(8 : i32) : i32
160158
// We expect two generated memset, one for each field.
161159
// CHECK-NOT: "llvm.intr.memset"
162-
// CHECK-DAG: "llvm.intr.memset"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
163-
// CHECK-DAG: "llvm.intr.memset"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
160+
// After SROA, only one i32 will be actually used, so only 4 bytes will be set.
161+
// CHECK: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
162+
// CHECK: "llvm.intr.memset"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
163+
// CHECK: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
164+
// CHECK: "llvm.intr.memset"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
164165
// CHECK-NOT: "llvm.intr.memset"
165166
"llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
166167
%2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)>
@@ -208,21 +209,21 @@ llvm.func @memset_considers_alignment() -> i32 {
208209

209210
// CHECK-LABEL: llvm.func @memset_considers_packing
210211
llvm.func @memset_considers_packing() -> i32 {
211-
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
212-
// CHECK-DAG: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
213-
// CHECK-DAG: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
214-
// CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
215-
// After SROA, only 32-bit values will be actually used, so only 4 bytes will be set.
216-
// CHECK-DAG: %[[MEMSET_LEN_WHOLE:.*]] = llvm.mlir.constant(4 : i32) : i32
217-
// CHECK-DAG: %[[MEMSET_LEN_PARTIAL:.*]] = llvm.mlir.constant(3 : i32) : i32
212+
// CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
213+
// CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
214+
// CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
215+
// CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
218216
%0 = llvm.mlir.constant(1 : i32) : i32
219217
%1 = llvm.alloca %0 x !llvm.struct<"foo", packed (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
220218
%memset_value = llvm.mlir.constant(42 : i8) : i8
221219
// 8 bytes means it will span over all the fields, because there is no padding as the struct is packed.
222220
%memset_len = llvm.mlir.constant(8 : i32) : i32
223221
// Now all fields are touched by the memset.
224222
// CHECK-NOT: "llvm.intr.memset"
223+
// After SROA, only 32-bit values will be actually used, so only 4 bytes will be set.
224+
// CHECK: %[[MEMSET_LEN_WHOLE:.*]] = llvm.mlir.constant(4 : i32) : i32
225225
// CHECK: "llvm.intr.memset"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN_WHOLE]]) <{isVolatile = false}>
226+
// CHECK: %[[MEMSET_LEN_PARTIAL:.*]] = llvm.mlir.constant(3 : i32) : i32
226227
// CHECK: "llvm.intr.memset"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN_PARTIAL]]) <{isVolatile = false}>
227228
// CHECK-NOT: "llvm.intr.memset"
228229
"llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
@@ -241,14 +242,14 @@ llvm.func @memset_considers_packing() -> i32 {
241242
// CHECK-LABEL: llvm.func @memcpy_dest
242243
// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
243244
llvm.func @memcpy_dest(%other_array: !llvm.ptr) -> i32 {
244-
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
245-
// CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
246-
// After SROA, only one i32 will be actually used, so only 4 bytes will be set.
247-
// CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
245+
// CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
246+
// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
248247
%0 = llvm.mlir.constant(1 : i32) : i32
249248
%1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr
250249
%memcpy_len = llvm.mlir.constant(40 : i32) : i32
251250
// CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
251+
// After SROA, only one i32 will be actually used, so only 4 bytes will be set.
252+
// CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
252253
// CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[SLOT_IN_OTHER]], %[[MEMCPY_LEN]]) <{isVolatile = false}>
253254
"llvm.intr.memcpy"(%1, %other_array, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
254255
%2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
@@ -261,24 +262,27 @@ llvm.func @memcpy_dest(%other_array: !llvm.ptr) -> i32 {
261262
// CHECK-LABEL: llvm.func @memcpy_src
262263
// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
263264
llvm.func @memcpy_src(%other_array: !llvm.ptr) -> i32 {
264-
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
265+
// CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
265266
// After SROA, only one i32 will be actually used, so only 4 bytes will be set.
266-
// CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
267267
// CHECK-COUNT-4: = llvm.alloca %[[ALLOCA_LEN]] x i32
268268
%0 = llvm.mlir.constant(1 : i32) : i32
269269
%1 = llvm.alloca %0 x !llvm.array<4 x i32> : (i32) -> !llvm.ptr
270270
%memcpy_len = llvm.mlir.constant(16 : i32) : i32
271271
// Unfortunately because of FileCheck limitations it is not possible to check which slot gets read from.
272272
// We can only check that the amount of operations and allocated slots is correct, which should be sufficient
273273
// as unused slots are not generated.
274-
// CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
275-
// CHECK-DAG: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
276-
// CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
277-
// CHECK-DAG: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
278-
// CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
279-
// CHECK-DAG: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
280-
// CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
281-
// CHECK-DAG: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
274+
// CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
275+
// CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
276+
// CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
277+
// CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
278+
// CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
279+
// CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
280+
// CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
281+
// CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
282+
// CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
283+
// CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
284+
// CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
285+
// CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
282286
"llvm.intr.memcpy"(%other_array, %1, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
283287
%2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
284288
%3 = llvm.load %2 : !llvm.ptr -> i32
@@ -289,14 +293,19 @@ llvm.func @memcpy_src(%other_array: !llvm.ptr) -> i32 {
289293

290294
// CHECK-LABEL: llvm.func @memcpy_double
291295
llvm.func @memcpy_double() -> i32 {
292-
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
293-
// CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
296+
// CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
294297
%0 = llvm.mlir.constant(1 : i32) : i32
295-
// CHECK-COUNT-2: = llvm.alloca %[[ALLOCA_LEN]] x i32
298+
// CHECK: = llvm.alloca %[[ALLOCA_LEN]] x i32
299+
// TODO: This should also disappear as a GEP with all zero indices should be
300+
// ignored.
301+
// CHECK: = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<1 x i32>
296302
%1 = llvm.alloca %0 x !llvm.array<1 x i32> : (i32) -> !llvm.ptr
297303
%2 = llvm.alloca %0 x !llvm.array<1 x i32> : (i32) -> !llvm.ptr
304+
// Match the dead constant, to avoid collision with the newly created one.
305+
// CHECK: llvm.mlir.constant
298306
%memcpy_len = llvm.mlir.constant(4 : i32) : i32
299307
// CHECK-NOT: "llvm.intr.memcpy"
308+
// CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
300309
// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
301310
// CHECK-NOT: "llvm.intr.memcpy"
302311
"llvm.intr.memcpy"(%1, %2, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
@@ -346,14 +355,14 @@ llvm.func @memcpy_no_volatile(%other_array: !llvm.ptr) -> i32 {
346355
// CHECK-LABEL: llvm.func @memmove_dest
347356
// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
348357
llvm.func @memmove_dest(%other_array: !llvm.ptr) -> i32 {
349-
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
350-
// CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
351-
// After SROA, only one i32 will be actually used, so only 4 bytes will be set.
352-
// CHECK-DAG: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
358+
// CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
359+
// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
353360
%0 = llvm.mlir.constant(1 : i32) : i32
354361
%1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr
355362
%memmove_len = llvm.mlir.constant(40 : i32) : i32
356363
// CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
364+
// After SROA, only one i32 will be actually used, so only 4 bytes will be set.
365+
// CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
357366
// CHECK: "llvm.intr.memmove"(%[[ALLOCA]], %[[SLOT_IN_OTHER]], %[[MEMMOVE_LEN]]) <{isVolatile = false}>
358367
"llvm.intr.memmove"(%1, %other_array, %memmove_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
359368
%2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
@@ -366,24 +375,26 @@ llvm.func @memmove_dest(%other_array: !llvm.ptr) -> i32 {
366375
// CHECK-LABEL: llvm.func @memmove_src
367376
// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
368377
llvm.func @memmove_src(%other_array: !llvm.ptr) -> i32 {
369-
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
370-
// After SROA, only one i32 will be actually used, so only 4 bytes will be set.
371-
// CHECK-DAG: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
378+
// CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
372379
// CHECK-COUNT-4: = llvm.alloca %[[ALLOCA_LEN]] x i32
373380
%0 = llvm.mlir.constant(1 : i32) : i32
374381
%1 = llvm.alloca %0 x !llvm.array<4 x i32> : (i32) -> !llvm.ptr
375382
%memmove_len = llvm.mlir.constant(16 : i32) : i32
376383
// Unfortunately because of FileCheck limitations it is not possible to check which slot gets read from.
377384
// We can only check that the amount of operations and allocated slots is correct, which should be sufficient
378385
// as unused slots are not generated.
379-
// CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
380-
// CHECK-DAG: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
381-
// CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
382-
// CHECK-DAG: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
383-
// CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
384-
// CHECK-DAG: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
385-
// CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
386-
// CHECK-DAG: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
386+
// CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
387+
// CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
388+
// CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
389+
// CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
390+
// CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
391+
// CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
392+
// CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
393+
// CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
394+
// CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
395+
// CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
396+
// CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
397+
// CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
387398
"llvm.intr.memmove"(%other_array, %1, %memmove_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
388399
%2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
389400
%3 = llvm.load %2 : !llvm.ptr -> i32

0 commit comments

Comments
 (0)