Skip to content

Commit c8ca486

Browse files
authored
[MLIR] print/parse resource handle key quoted and escaped (llvm#119746)
resource keys have the problem that you can’t parse them from mlir assembly if they have special or non-printable characters, but nothing prevents you from specifying such a key when you create e.g. a DenseResourceElementsAttr, and it works fine in other ways, including bytecode emission and parsing this PR solves the parsing by quoting and escaping keys with special or non-printable characters in mlir assembly, in the same way as symbols, e.g.: ``` module attributes { fst = dense_resource<resource_fst> : tensor<2xf16>, snd = dense_resource<"resource\09snd"> : tensor<2xf16> } {} {-# dialect_resources: { builtin: { resource_fst: "0x0200000001000200", "resource\09snd": "0x0200000008000900" } } #-} ``` by not quoting keys without special or non-printable characters, the change is effectively backwards compatible the change is tested by: 1. adding a test with a dense resource handle key with special characters to `dense-resource-elements-attr.mlir` 2. adding special and unprintable characters to some resource keys in the existing lit tests `pretty-resources-print.mlir` and `mlir/test/Bytecode/resources.mlir`
1 parent d13940e commit c8ca486

File tree

8 files changed

+96
-71
lines changed

8 files changed

+96
-71
lines changed

mlir/include/mlir/IR/OpImplementation.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,8 @@ class AsmPrinter {
202202
/// special or non-printable characters in it.
203203
virtual void printSymbolName(StringRef symbolRef);
204204

205-
/// Print a handle to the given dialect resource.
205+
/// Print a handle to the given dialect resource. The handle key is quoted and
206+
/// escaped if it has any special or non-printable characters in it.
206207
virtual void printResourceHandle(const AsmDialectResourceHandle &resource);
207208

208209
/// Print an optional arrow followed by a type list.

mlir/lib/AsmParser/AsmParserImpl.h

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -248,13 +248,7 @@ class AsmParserImpl : public BaseT {
248248

249249
/// Parses a quoted string token if present.
250250
ParseResult parseOptionalString(std::string *string) override {
251-
if (!parser.getToken().is(Token::string))
252-
return failure();
253-
254-
if (string)
255-
*string = parser.getToken().getStringValue();
256-
parser.consumeToken();
257-
return success();
251+
return parser.parseOptionalString(string);
258252
}
259253

260254
/// Parses a Base64 encoded string of bytes.
@@ -355,13 +349,7 @@ class AsmParserImpl : public BaseT {
355349

356350
/// Parse a keyword, if present, into 'keyword'.
357351
ParseResult parseOptionalKeyword(StringRef *keyword) override {
358-
// Check that the current token is a keyword.
359-
if (!parser.isCurrentTokenAKeyword())
360-
return failure();
361-
362-
*keyword = parser.getTokenSpelling();
363-
parser.consumeToken();
364-
return success();
352+
return parser.parseOptionalKeyword(keyword);
365353
}
366354

367355
/// Parse a keyword if it is one of the 'allowedKeywords'.
@@ -387,13 +375,7 @@ class AsmParserImpl : public BaseT {
387375

388376
/// Parse an optional keyword or string and set instance into 'result'.`
389377
ParseResult parseOptionalKeywordOrString(std::string *result) override {
390-
StringRef keyword;
391-
if (succeeded(parseOptionalKeyword(&keyword))) {
392-
*result = keyword.str();
393-
return success();
394-
}
395-
396-
return parseOptionalString(result);
378+
return parser.parseOptionalKeywordOrString(result);
397379
}
398380

399381
//===--------------------------------------------------------------------===//
@@ -514,7 +496,7 @@ class AsmParserImpl : public BaseT {
514496
return parser.emitError() << "dialect '" << dialect->getNamespace()
515497
<< "' does not expect resource handles";
516498
}
517-
StringRef resourceName;
499+
std::string resourceName;
518500
return parser.parseResourceHandle(interface, resourceName);
519501
}
520502

mlir/lib/AsmParser/Parser.cpp

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,17 @@ ParseResult Parser::parseToken(Token::Kind expectedToken,
271271
return emitWrongTokenError(message);
272272
}
273273

274+
/// Parses a quoted string token if present.
275+
ParseResult Parser::parseOptionalString(std::string *string) {
276+
if (!getToken().is(Token::string))
277+
return failure();
278+
279+
if (string)
280+
*string = getToken().getStringValue();
281+
consumeToken();
282+
return success();
283+
}
284+
274285
/// Parse an optional integer value from the stream.
275286
OptionalParseResult Parser::parseOptionalInteger(APInt &result) {
276287
// Parse `false` and `true` keywords as 0 and 1 respectively.
@@ -412,15 +423,25 @@ ParseResult Parser::parseOptionalKeyword(StringRef *keyword) {
412423
return success();
413424
}
414425

426+
ParseResult Parser::parseOptionalKeywordOrString(std::string *result) {
427+
StringRef keyword;
428+
if (succeeded(parseOptionalKeyword(&keyword))) {
429+
*result = keyword.str();
430+
return success();
431+
}
432+
433+
return parseOptionalString(result);
434+
}
435+
415436
//===----------------------------------------------------------------------===//
416437
// Resource Parsing
417438

418439
FailureOr<AsmDialectResourceHandle>
419440
Parser::parseResourceHandle(const OpAsmDialectInterface *dialect,
420-
StringRef &name) {
441+
std::string &name) {
421442
assert(dialect && "expected valid dialect interface");
422443
SMLoc nameLoc = getToken().getLoc();
423-
if (failed(parseOptionalKeyword(&name)))
444+
if (failed(parseOptionalKeywordOrString(&name)))
424445
return emitError("expected identifier key for 'resource' entry");
425446
auto &resources = getState().symbols.dialectResources;
426447

@@ -451,7 +472,7 @@ Parser::parseResourceHandle(Dialect *dialect) {
451472
return emitError() << "dialect '" << dialect->getNamespace()
452473
<< "' does not expect resource handles";
453474
}
454-
StringRef resourceName;
475+
std::string resourceName;
455476
return parseResourceHandle(interface, resourceName);
456477
}
457478

@@ -2530,8 +2551,8 @@ class TopLevelOperationParser : public Parser {
25302551
/// textual format.
25312552
class ParsedResourceEntry : public AsmParsedResourceEntry {
25322553
public:
2533-
ParsedResourceEntry(StringRef key, SMLoc keyLoc, Token value, Parser &p)
2534-
: key(key), keyLoc(keyLoc), value(value), p(p) {}
2554+
ParsedResourceEntry(std::string key, SMLoc keyLoc, Token value, Parser &p)
2555+
: key(std::move(key)), keyLoc(keyLoc), value(value), p(p) {}
25352556
~ParsedResourceEntry() override = default;
25362557

25372558
StringRef getKey() const final { return key; }
@@ -2607,7 +2628,7 @@ class ParsedResourceEntry : public AsmParsedResourceEntry {
26072628
}
26082629

26092630
private:
2610-
StringRef key;
2631+
std::string key;
26112632
SMLoc keyLoc;
26122633
Token value;
26132634
Parser &p;
@@ -2736,7 +2757,7 @@ ParseResult TopLevelOperationParser::parseDialectResourceFileMetadata() {
27362757
return parseCommaSeparatedListUntil(Token::r_brace, [&]() -> ParseResult {
27372758
// Parse the name of the resource entry.
27382759
SMLoc keyLoc = getToken().getLoc();
2739-
StringRef key;
2760+
std::string key;
27402761
if (failed(parseResourceHandle(handler, key)) ||
27412762
parseToken(Token::colon, "expected ':'"))
27422763
return failure();
@@ -2763,8 +2784,8 @@ ParseResult TopLevelOperationParser::parseExternalResourceFileMetadata() {
27632784
return parseCommaSeparatedListUntil(Token::r_brace, [&]() -> ParseResult {
27642785
// Parse the name of the resource entry.
27652786
SMLoc keyLoc = getToken().getLoc();
2766-
StringRef key;
2767-
if (failed(parseOptionalKeyword(&key)))
2787+
std::string key;
2788+
if (failed(parseOptionalKeywordOrString(&key)))
27682789
return emitError(
27692790
"expected identifier key for 'external_resources' entry");
27702791
if (parseToken(Token::colon, "expected ':'"))

mlir/lib/AsmParser/Parser.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,9 @@ class Parser {
146146
/// output a diagnostic and return failure.
147147
ParseResult parseToken(Token::Kind expectedToken, const Twine &message);
148148

149+
/// Parses a quoted string token if present.
150+
ParseResult parseOptionalString(std::string *string);
151+
149152
/// Parse an optional integer value from the stream.
150153
OptionalParseResult parseOptionalInteger(APInt &result);
151154

@@ -171,13 +174,16 @@ class Parser {
171174
/// Parse a keyword, if present, into 'keyword'.
172175
ParseResult parseOptionalKeyword(StringRef *keyword);
173176

177+
/// Parse an optional keyword or string and set instance into 'result'.`
178+
ParseResult parseOptionalKeywordOrString(std::string *result);
179+
174180
//===--------------------------------------------------------------------===//
175181
// Resource Parsing
176182
//===--------------------------------------------------------------------===//
177183

178184
/// Parse a handle to a dialect resource within the assembly format.
179185
FailureOr<AsmDialectResourceHandle>
180-
parseResourceHandle(const OpAsmDialectInterface *dialect, StringRef &name);
186+
parseResourceHandle(const OpAsmDialectInterface *dialect, std::string &name);
181187
FailureOr<AsmDialectResourceHandle> parseResourceHandle(Dialect *dialect);
182188

183189
//===--------------------------------------------------------------------===//

mlir/lib/IR/AsmPrinter.cpp

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2188,13 +2188,6 @@ void AsmPrinter::Impl::printLocation(LocationAttr loc, bool allowAlias) {
21882188
os << ')';
21892189
}
21902190

2191-
void AsmPrinter::Impl::printResourceHandle(
2192-
const AsmDialectResourceHandle &resource) {
2193-
auto *interface = cast<OpAsmDialectInterface>(resource.getDialect());
2194-
os << interface->getResourceKey(resource);
2195-
state.getDialectResources()[resource.getDialect()].insert(resource);
2196-
}
2197-
21982191
/// Returns true if the given dialect symbol data is simple enough to print in
21992192
/// the pretty form. This is essentially when the symbol takes the form:
22002193
/// identifier (`<` body `>`)?
@@ -2279,6 +2272,13 @@ static void printElidedElementsAttr(raw_ostream &os) {
22792272
os << R"(dense_resource<__elided__>)";
22802273
}
22812274

2275+
void AsmPrinter::Impl::printResourceHandle(
2276+
const AsmDialectResourceHandle &resource) {
2277+
auto *interface = cast<OpAsmDialectInterface>(resource.getDialect());
2278+
::printKeywordOrString(interface->getResourceKey(resource), os);
2279+
state.getDialectResources()[resource.getDialect()].insert(resource);
2280+
}
2281+
22822282
LogicalResult AsmPrinter::Impl::printAlias(Attribute attr) {
22832283
return state.getAliasState().getAlias(attr, os);
22842284
}
@@ -3373,41 +3373,41 @@ void OperationPrinter::printResourceFileMetadata(
33733373
auto printFn = [&](StringRef key, ResourceBuilder::ValueFn valueFn) {
33743374
checkAddMetadataDict();
33753375

3376-
auto printFormatting = [&]() {
3377-
// Emit the top-level resource entry if we haven't yet.
3378-
if (!std::exchange(hadResource, true)) {
3379-
if (needResourceComma)
3380-
os << "," << newLine;
3381-
os << " " << dictName << "_resources: {" << newLine;
3382-
}
3383-
// Emit the parent resource entry if we haven't yet.
3384-
if (!std::exchange(hadEntry, true)) {
3385-
if (needEntryComma)
3386-
os << "," << newLine;
3387-
os << " " << name << ": {" << newLine;
3388-
} else {
3389-
os << "," << newLine;
3390-
}
3391-
};
3392-
3376+
std::string resourceStr;
3377+
auto printResourceStr = [&](raw_ostream &os) { os << resourceStr; };
33933378
std::optional<uint64_t> charLimit =
33943379
printerFlags.getLargeResourceStringLimit();
33953380
if (charLimit.has_value()) {
3396-
std::string resourceStr;
33973381
llvm::raw_string_ostream ss(resourceStr);
33983382
valueFn(ss);
33993383

3400-
// Only print entry if it's string is small enough
3384+
// Only print entry if its string is small enough.
34013385
if (resourceStr.size() > charLimit.value())
34023386
return;
34033387

3404-
printFormatting();
3405-
os << " " << key << ": " << resourceStr;
3388+
// Don't recompute resourceStr when valueFn is called below.
3389+
valueFn = printResourceStr;
3390+
}
3391+
3392+
// Emit the top-level resource entry if we haven't yet.
3393+
if (!std::exchange(hadResource, true)) {
3394+
if (needResourceComma)
3395+
os << "," << newLine;
3396+
os << " " << dictName << "_resources: {" << newLine;
3397+
}
3398+
// Emit the parent resource entry if we haven't yet.
3399+
if (!std::exchange(hadEntry, true)) {
3400+
if (needEntryComma)
3401+
os << "," << newLine;
3402+
os << " " << name << ": {" << newLine;
34063403
} else {
3407-
printFormatting();
3408-
os << " " << key << ": ";
3409-
valueFn(os);
3404+
os << "," << newLine;
34103405
}
3406+
os << " ";
3407+
::printKeywordOrString(key, os);
3408+
os << ": ";
3409+
// Call printResourceStr or original valueFn, depending on charLimit.
3410+
valueFn(os);
34113411
};
34123412
ResourceBuilder entryBuilder(printFn);
34133413
provider.buildResources(op, providerArgs..., entryBuilder);

mlir/test/Bytecode/resources.mlir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,21 @@
44
module @TestDialectResources attributes {
55
// CHECK: bytecode.test = dense_resource<decl_resource> : tensor<2xui32>
66
// CHECK: bytecode.test2 = dense_resource<resource> : tensor<4xf64>
7-
// CHECK: bytecode.test3 = dense_resource<resource_2> : tensor<4xf64>
7+
// CHECK: bytecode.test3 = dense_resource<"resource\09two"> : tensor<4xf64>
88
bytecode.test = dense_resource<decl_resource> : tensor<2xui32>,
99
bytecode.test2 = dense_resource<resource> : tensor<4xf64>,
10-
bytecode.test3 = dense_resource<resource_2> : tensor<4xf64>
10+
bytecode.test3 = dense_resource<"resource\09two"> : tensor<4xf64>
1111
} {}
1212

1313
// CHECK: builtin: {
1414
// CHECK-NEXT: resource: "0x08000000010000000000000002000000000000000300000000000000"
15-
// CHECK-NEXT: resource_2: "0x08000000010000000000000002000000000000000300000000000000"
15+
// CHECK-NEXT: "resource\09two": "0x08000000010000000000000002000000000000000300000000000000"
1616

1717
{-#
1818
dialect_resources: {
1919
builtin: {
2020
resource: "0x08000000010000000000000002000000000000000300000000000000",
21-
resource_2: "0x08000000010000000000000002000000000000000300000000000000"
21+
"resource\09two": "0x08000000010000000000000002000000000000000300000000000000"
2222
}
2323
}
2424
#-}

mlir/test/IR/dense-resource-elements-attr.mlir

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,18 @@
1111
}
1212
}
1313
#-}
14+
15+
// -----
16+
17+
// DenseResourceElementsHandle key blob\-"one" is quoted and escaped.
18+
// CHECK: attr = dense_resource<"blob\\-\22one\22"> : tensor<2xi16>
19+
"test.user_op"() {attr = dense_resource<"blob\\-\22one\22"> : tensor<2xi16>} : () -> ()
20+
21+
{-#
22+
dialect_resources: {
23+
builtin: {
24+
// CHECK: "blob\\-\22one\22": "0x0200000001000200"
25+
"blob\\-\22one\22": "0x0200000001000200"
26+
}
27+
}
28+
#-}

mlir/test/IR/pretty-resources-print.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
// CHECK: {-#
1313
// CHECK-NEXT: external_resources: {
1414
// CHECK-NEXT: external: {
15-
// CHECK-NEXT: bool: true,
15+
// CHECK-NEXT: "backslash\\tab\09": true,
1616
// CHECK-NEXT: string: "\22string\22"
1717
// CHECK-NEXT: },
1818
// CHECK-NEXT: other_stuff: {
@@ -31,8 +31,8 @@
3131
external_resources: {
3232
external: {
3333
blob: "0x08000000010000000000000002000000000000000300000000000000",
34-
bool: true,
35-
string: "\"string\"" // with escape characters
34+
"backslash\\tab\09": true, // quoted key with escape characters
35+
string: "\"string\"" // string with escape characters
3636
},
3737
other_stuff: {
3838
bool: true

0 commit comments

Comments
 (0)