Skip to content

Commit 516ccce

Browse files
[mlir] Make the split markers of splitAndProcessBuffer configurable. (#84765)
This allows to define custom splitters, which is interesting for non-MLIR inputs and outputs to `mlir-translate`. For example, one may use `; -----` as a splitter of `.ll` files. The splitters are now passed as arguments into `splitAndProcessBuffer`, the input splitter defaulting to the previous default (`// -----`) and the output splitter defaulting to the empty string, which also corresponds to the previous default. The behavior of the input split marker should not change at all; however, outputs now have one new line *more* than before if there is no splitter (old: `insertMarkerInOutput = false`, new: `outputSplitMarker = ""`) and one new line *less* if there is one. The value of the input splitter is exposed as a command line options of `mlir-translate` and other tools as an optional value to the previously existing flag `-split-input-file`, which defaults to the default splitter if not specified; the value of the output splitter is exposed with the new `-output-split-marker`, which default to the empty string in `mlir-translate` and the default splitter in the other tools. In short, the previous usage or omission of the flags should result in previous behavior (modulo the new lines mentioned before).
1 parent 2e271ce commit 516ccce

File tree

12 files changed

+191
-59
lines changed

12 files changed

+191
-59
lines changed

mlir/include/mlir/Support/ToolUtilities.h

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515

1616
#include "mlir/Support/LLVM.h"
1717
#include "llvm/ADT/STLExtras.h"
18+
#include "llvm/ADT/StringRef.h"
19+
1820
#include <memory>
1921

2022
namespace llvm {
@@ -27,20 +29,24 @@ struct LogicalResult;
2729
using ChunkBufferHandler = function_ref<LogicalResult(
2830
std::unique_ptr<llvm::MemoryBuffer> chunkBuffer, raw_ostream &os)>;
2931

30-
/// Splits the specified buffer on a marker (`// -----`), processes each chunk
31-
/// independently according to the normal `processChunkBuffer` logic, and writes
32-
/// all results to `os`.
32+
extern inline const char *const kDefaultSplitMarker = "// -----";
33+
34+
/// Splits the specified buffer on a marker (`// -----` by default), processes
35+
/// each chunk independently according to the normal `processChunkBuffer` logic,
36+
/// and writes all results to `os`.
3337
///
3438
/// This is used to allow a large number of small independent tests to be put
35-
/// into a single file. `enableSplitting` can be used to toggle if splitting
36-
/// should be enabled, e.g. to allow for merging split and non-split code paths.
37-
/// When `insertMarkerInOutput` is true, split markers (`//-----`) are placed
38-
/// between each of the processed output chunks.
39+
/// into a single file. The input split marker is configurable. If it is empty,
40+
/// merging is disabled, which allows for merging split and non-split code
41+
/// paths. Output split markers (`//-----` by default) followed by a new line
42+
/// character, respectively, are placed between each of the processed output
43+
/// chunks. (The new line character is inserted even if the split marker is
44+
/// empty.)
3945
LogicalResult
4046
splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
4147
ChunkBufferHandler processChunkBuffer, raw_ostream &os,
42-
bool enableSplitting = true,
43-
bool insertMarkerInOutput = false);
48+
llvm::StringRef inputSplitMarker = kDefaultSplitMarker,
49+
llvm::StringRef outputSplitMarker = "");
4450
} // namespace mlir
4551

4652
#endif // MLIR_SUPPORT_TOOLUTILITIES_H

mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "mlir/Debug/CLOptionsSetup.h"
1717
#include "mlir/Support/LogicalResult.h"
18+
#include "mlir/Support/ToolUtilities.h"
1819
#include "llvm/ADT/StringRef.h"
1920

2021
#include <cstdlib>
@@ -136,13 +137,24 @@ class MlirOptMainConfig {
136137
}
137138
bool shouldShowDialects() const { return showDialectsFlag; }
138139

139-
/// Set whether to split the input file based on the `// -----` marker into
140-
/// pieces and process each chunk independently.
141-
MlirOptMainConfig &splitInputFile(bool split = true) {
142-
splitInputFileFlag = split;
140+
/// Set the marker on which to split the input into chunks and process each
141+
/// chunk independently. Input is not split if empty.
142+
MlirOptMainConfig &
143+
splitInputFile(std::string splitMarker = kDefaultSplitMarker) {
144+
splitInputFileFlag = std::move(splitMarker);
145+
return *this;
146+
}
147+
bool shouldSplitInputFile() const { return splitInputFileFlag.empty(); }
148+
StringRef inputSplitMarker() const { return splitInputFileFlag; }
149+
150+
/// Set whether to merge the output chunks into one file using the given
151+
/// marker.
152+
MlirOptMainConfig &
153+
outputSplitMarker(std::string splitMarker = kDefaultSplitMarker) {
154+
outputSplitMarkerFlag = std::move(splitMarker);
143155
return *this;
144156
}
145-
bool shouldSplitInputFile() const { return splitInputFileFlag; }
157+
StringRef outputSplitMarker() const { return outputSplitMarkerFlag; }
146158

147159
/// Disable implicit addition of a top-level module op during parsing.
148160
MlirOptMainConfig &useExplicitModule(bool useExplicitModule) {
@@ -215,9 +227,12 @@ class MlirOptMainConfig {
215227
/// Show the registered dialects before trying to load the input file.
216228
bool showDialectsFlag = false;
217229

218-
/// Split the input file based on the `// -----` marker into pieces and
219-
/// process each chunk independently.
220-
bool splitInputFileFlag = false;
230+
/// Split the input file based on the given marker into chunks and process
231+
/// each chunk independently. Input is not split if empty.
232+
std::string splitInputFileFlag = "";
233+
234+
/// Merge output chunks into one file using the given marker.
235+
std::string outputSplitMarkerFlag = "";
221236

222237
/// Use an explicit top-level module op during parsing.
223238
bool useExplicitModuleFlag = false;

mlir/lib/Support/ToolUtilities.cpp

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,20 @@ using namespace mlir;
2121
LogicalResult
2222
mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
2323
ChunkBufferHandler processChunkBuffer,
24-
raw_ostream &os, bool enableSplitting,
25-
bool insertMarkerInOutput) {
24+
raw_ostream &os, llvm::StringRef inputSplitMarker,
25+
llvm::StringRef outputSplitMarker) {
2626
// If splitting is disabled, we process the full input buffer.
27-
if (!enableSplitting)
27+
if (inputSplitMarker.empty())
2828
return processChunkBuffer(std::move(originalBuffer), os);
2929

30-
const char splitMarkerConst[] = "// -----";
31-
StringRef splitMarker(splitMarkerConst);
32-
const int splitMarkerLen = splitMarker.size();
30+
const int inputSplitMarkerLen = inputSplitMarker.size();
3331

3432
auto *origMemBuffer = originalBuffer.get();
3533
SmallVector<StringRef, 8> rawSourceBuffers;
3634
const int checkLen = 2;
3735
// Split dropping the last checkLen chars to enable flagging near misses.
3836
origMemBuffer->getBuffer().split(rawSourceBuffers,
39-
splitMarker.drop_back(checkLen));
37+
inputSplitMarker.drop_back(checkLen));
4038
if (rawSourceBuffers.empty())
4139
return success();
4240

@@ -58,8 +56,9 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
5856
}
5957

6058
// Check that suffix is as expected and doesn't have any dash post.
61-
bool expectedSuffix = buffer.starts_with(splitMarker.take_back(checkLen)) &&
62-
buffer.size() > checkLen && buffer[checkLen] != '0';
59+
bool expectedSuffix =
60+
buffer.starts_with(inputSplitMarker.take_back(checkLen)) &&
61+
buffer.size() > checkLen && buffer[checkLen] != '0';
6362
if (expectedSuffix) {
6463
sourceBuffers.push_back(prev);
6564
prev = buffer.drop_front(checkLen);
@@ -69,8 +68,8 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
6968
fileSourceMgr.PrintMessage(llvm::errs(), splitLoc,
7069
llvm::SourceMgr::DK_Warning,
7170
"near miss with file split marker");
72-
prev = StringRef(prev.data(),
73-
prev.size() + splitMarkerLen - checkLen + buffer.size());
71+
prev = StringRef(prev.data(), prev.size() + inputSplitMarkerLen -
72+
checkLen + buffer.size());
7473
}
7574
}
7675
if (!prev.empty())
@@ -89,7 +88,7 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
8988
hadFailure = true;
9089
};
9190
llvm::interleave(sourceBuffers, os, interleaveFn,
92-
insertMarkerInOutput ? "\n// -----\n" : "");
91+
(llvm::Twine(outputSplitMarker) + "\n").str());
9392

9493
// If any fails, then return a failure of the tool.
9594
return failure(hadFailure);

mlir/lib/Tools/lsp-server-support/Transport.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "mlir/Tools/lsp-server-support/Transport.h"
10+
#include "mlir/Support/ToolUtilities.h"
1011
#include "mlir/Tools/lsp-server-support/Logging.h"
1112
#include "mlir/Tools/lsp-server-support/Protocol.h"
1213
#include "llvm/ADT/SmallString.h"
@@ -347,7 +348,7 @@ LogicalResult JSONTransport::readDelimitedMessage(std::string &json) {
347348
StringRef lineRef = line.str().trim();
348349
if (lineRef.starts_with("//")) {
349350
// Found a delimiter for the message.
350-
if (lineRef == "// -----")
351+
if (lineRef == kDefaultSplitMarker)
351352
break;
352353
continue;
353354
}

mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "mlir/IR/Operation.h"
1616
#include "mlir/Interfaces/FunctionInterfaces.h"
1717
#include "mlir/Parser/Parser.h"
18+
#include "mlir/Support/ToolUtilities.h"
1819
#include "mlir/Tools/lsp-server-support/Logging.h"
1920
#include "mlir/Tools/lsp-server-support/SourceMgrUtils.h"
2021
#include "llvm/ADT/StringExtras.h"
@@ -1052,11 +1053,8 @@ MLIRTextFile::MLIRTextFile(const lsp::URIForFile &uri, StringRef fileContents,
10521053
context.allowUnregisteredDialects();
10531054

10541055
// Split the file into separate MLIR documents.
1055-
// TODO: Find a way to share the split file marker with other tools. We don't
1056-
// want to use `splitAndProcessBuffer` here, but we do want to make sure this
1057-
// marker doesn't go out of sync.
10581056
SmallVector<StringRef, 8> subContents;
1059-
StringRef(contents).split(subContents, "// -----");
1057+
StringRef(contents).split(subContents, kDefaultSplitMarker);
10601058
chunks.emplace_back(std::make_unique<MLIRTextFileChunk>(
10611059
context, /*lineOffset=*/0, uri, subContents.front(), diagnostics));
10621060

mlir/lib/Tools/mlir-opt/MlirOptMain.cpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,21 @@ struct MlirOptMainConfigCLOptions : public MlirOptMainConfig {
127127
cl::desc("Print the list of registered dialects and exit"),
128128
cl::location(showDialectsFlag), cl::init(false));
129129

130-
static cl::opt<bool, /*ExternalStorage=*/true> splitInputFile(
131-
"split-input-file",
132-
cl::desc("Split the input file into pieces and process each "
133-
"chunk independently"),
134-
cl::location(splitInputFileFlag), cl::init(false));
130+
static cl::opt<std::string, /*ExternalStorage=*/true> splitInputFile(
131+
"split-input-file", llvm::cl::ValueOptional,
132+
cl::callback([&](const std::string &str) {
133+
// Implicit value: use default marker if flag was used without value.
134+
if (str.empty())
135+
splitInputFile.setValue(kDefaultSplitMarker);
136+
}),
137+
cl::desc("Split the input file into chunks using the given or "
138+
"default marker and process each chunk independently"),
139+
cl::location(splitInputFileFlag), cl::init(""));
140+
141+
static cl::opt<std::string, /*ExternalStorage=*/true> outputSplitMarker(
142+
"output-split-marker",
143+
cl::desc("Split marker to use for merging the ouput"),
144+
cl::location(outputSplitMarkerFlag), cl::init(kDefaultSplitMarker));
135145

136146
static cl::opt<bool, /*ExternalStorage=*/true> verifyDiagnostics(
137147
"verify-diagnostics",
@@ -533,8 +543,8 @@ LogicalResult mlir::MlirOptMain(llvm::raw_ostream &outputStream,
533543
threadPool);
534544
};
535545
return splitAndProcessBuffer(std::move(buffer), chunkFn, outputStream,
536-
config.shouldSplitInputFile(),
537-
/*insertMarkerInOutput=*/true);
546+
config.inputSplitMarker(),
547+
config.outputSplitMarker());
538548
}
539549

540550
LogicalResult mlir::MlirOptMain(int argc, char **argv,

mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include "Protocol.h"
1212
#include "mlir/IR/BuiltinOps.h"
13+
#include "mlir/Support/ToolUtilities.h"
1314
#include "mlir/Tools/PDLL/AST/Context.h"
1415
#include "mlir/Tools/PDLL/AST/Nodes.h"
1516
#include "mlir/Tools/PDLL/AST/Types.h"
@@ -1621,7 +1622,8 @@ PDLTextFile::getPDLLViewOutput(lsp::PDLLViewOutputKind kind) {
16211622
[&](PDLTextFileChunk &chunk) {
16221623
chunk.document.getPDLLViewOutput(outputOS, kind);
16231624
},
1624-
[&] { outputOS << "\n// -----\n\n"; });
1625+
[&] { outputOS << "\n"
1626+
<< kDefaultSplitMarker << "\n\n"; });
16251627
}
16261628
return result;
16271629
}
@@ -1632,11 +1634,8 @@ void PDLTextFile::initialize(const lsp::URIForFile &uri, int64_t newVersion,
16321634
chunks.clear();
16331635

16341636
// Split the file into separate PDL documents.
1635-
// TODO: Find a way to share the split file marker with other tools. We don't
1636-
// want to use `splitAndProcessBuffer` here, but we do want to make sure this
1637-
// marker doesn't go out of sync.
16381637
SmallVector<StringRef, 8> subContents;
1639-
StringRef(contents).split(subContents, "// -----");
1638+
StringRef(contents).split(subContents, kDefaultSplitMarker);
16401639
chunks.emplace_back(std::make_unique<PDLTextFileChunk>(
16411640
/*lineOffset=*/0, uri, subContents.front(), extraIncludeDirs,
16421641
diagnostics));

mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,16 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
6262
llvm::cl::desc("Allow operation with no registered dialects (discouraged: testing only!)"),
6363
llvm::cl::init(false));
6464

65-
static llvm::cl::opt<bool> splitInputFile(
66-
"split-input-file",
67-
llvm::cl::desc("Split the input file into pieces and "
68-
"process each chunk independently"),
69-
llvm::cl::init(false));
65+
static llvm::cl::opt<std::string> inputSplitMarker(
66+
"split-input-file", llvm::cl::ValueOptional,
67+
llvm::cl::callback([&](const std::string &str) {
68+
// Implicit value: use default marker if flag was used without value.
69+
if (str.empty())
70+
inputSplitMarker.setValue(kDefaultSplitMarker);
71+
}),
72+
llvm::cl::desc("Split the input file into chunks using the given or "
73+
"default marker and process each chunk independently"),
74+
llvm::cl::init(""));
7075

7176
static llvm::cl::opt<bool> verifyDiagnostics(
7277
"verify-diagnostics",
@@ -80,6 +85,11 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
8085
"(discouraged: testing only!)"),
8186
llvm::cl::init(false));
8287

88+
static llvm::cl::opt<std::string> outputSplitMarker(
89+
"output-split-marker",
90+
llvm::cl::desc("Split marker to use for merging the ouput"),
91+
llvm::cl::init(""));
92+
8393
llvm::InitLLVM y(argc, argv);
8494

8595
// Add flags for all the registered translations.
@@ -176,7 +186,8 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
176186
};
177187

178188
if (failed(splitAndProcessBuffer(std::move(input), processBuffer,
179-
output->os(), splitInputFile)))
189+
output->os(), inputSplitMarker,
190+
outputSplitMarker)))
180191
return failure();
181192

182193
output->keep();

mlir/test/mlir-opt/nearmiss.mlir renamed to mlir/test/mlir-opt/split-markers.mlir

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1-
// RUN: mlir-opt --split-input-file --verify-diagnostics %s 2> %t && FileCheck --input-file %t %s
1+
// Check near-miss mechanics:
2+
// RUN: mlir-opt --split-input-file --verify-diagnostics %s 2> %t \
3+
// RUN: && FileCheck --input-file %t %s
24
// RUN: cat %t
35

6+
// Check that (1) custom input splitter and (2) custom output splitters work.
7+
// RUN: mlir-opt %s -split-input-file="// CHECK: ""----" \
8+
// RUN: -output-split-marker="// ---- next split ----" \
9+
// RUN: | FileCheck -input-file %s -check-prefix=CHECK-SPLITTERS %s
10+
411
func.func @main() {return}
512

613
// -----
@@ -20,3 +27,9 @@ func.func @bar2() {return }
2027

2128
// No error flagged at the end for a near miss.
2229
// ----
30+
31+
// CHECK-SPLITTERS: module
32+
// CHECK-SPLITTERS: ---- next split ----
33+
// CHECK-SPLITTERS: module
34+
// CHECK-SPLITTERS: ---- next split ----
35+
// CHECK-SPLITTERS: module
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Check that (1) the default input split marker used if no custom marker is
2+
// specified and (2) the output file is merged using the default marker.
3+
// RUN: mlir-pdll %s -split-input-file \
4+
// RUN: | FileCheck -check-prefix=CHECK-DEFAULT %s
5+
6+
// Check that the custom (3) input and (output) split markers are used if
7+
// provided.
8+
// RUN: mlir-pdll %s \
9+
// RUN: -split-input-file="// ""=====" -output-split-marker "// #####" \
10+
// RUN: | FileCheck -check-prefix=CHECK-CUSTOM %s
11+
12+
// CHECK-DEFAULT: Module
13+
// CHECK-DEFAULT-NEXT: PatternDecl
14+
// CHECK-DEFAULT-NOT: PatternDecl
15+
// CHECK-DEFAULT: //{{ }}-----
16+
// CHECK-DEFAULT-NEXT: Module
17+
// CHECK-DEFAULT-NEXT: PatternDecl
18+
// CHECK-DEFAULT: PatternDecl
19+
20+
// CHECK-CUSTOM: Module
21+
// CHECK-CUSTOM-NEXT: PatternDecl
22+
// CHECK-CUSTOM: PatternDecl
23+
// CHECK-CUSTOM: // #####
24+
// CHECK-CUSTOM-NEXT: Module
25+
// CHECK-CUSTOM-NEXT: PatternDecl
26+
// CHECK-CUSTOM-NOT: PatternDecl
27+
28+
Pattern => erase op<test.op>;
29+
30+
// -----
31+
32+
Pattern => erase op<test.op2>;
33+
34+
// =====
35+
36+
Pattern => erase op<test.op3>;

0 commit comments

Comments
 (0)