Skip to content

[Remarks] Auto-detect remark parser format #144554

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/include/llvm/Remarks/RemarkFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,17 @@ namespace remarks {
constexpr StringLiteral Magic("REMARKS");

/// The format used for serializing/deserializing remarks.
enum class Format { Unknown, YAML, Bitstream };
enum class Format { Unknown, Auto, YAML, Bitstream };

/// Parse and validate a string for the remark format.
LLVM_ABI Expected<Format> parseFormat(StringRef FormatStr);

/// Parse and validate a magic number to a remark format.
LLVM_ABI Expected<Format> magicToFormat(StringRef Magic);

/// Detect format based on selected format and magic number
LLVM_ABI Expected<Format> detectFormat(Format Selected, StringRef Magic);

} // end namespace remarks
} // end namespace llvm

Expand Down
5 changes: 2 additions & 3 deletions llvm/include/llvm/Remarks/RemarkLinker.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,12 @@ struct RemarkLinker {
/// \p Buffer.
/// \p Buffer can be either a standalone remark container or just
/// metadata. This takes care of uniquing and merging the remarks.
LLVM_ABI Error link(StringRef Buffer,
std::optional<Format> RemarkFormat = std::nullopt);
LLVM_ABI Error link(StringRef Buffer, Format RemarkFormat = Format::Auto);

/// Link the remarks found in \p Obj by looking for the right section and
/// calling the method above.
LLVM_ABI Error link(const object::ObjectFile &Obj,
std::optional<Format> RemarkFormat = std::nullopt);
Format RemarkFormat = Format::Auto);

/// Serialize the linked remarks to the stream \p OS, using the format \p
/// RemarkFormat.
Expand Down
18 changes: 17 additions & 1 deletion llvm/lib/Remarks/RemarkFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,22 @@ Expected<Format> llvm::remarks::magicToFormat(StringRef MagicStr) {

if (Result == Format::Unknown)
return createStringError(std::make_error_code(std::errc::invalid_argument),
"Unknown remark magic: '%s'", MagicStr.data());
"Automatic detection of remark format failed. "
"Unknown magic number: '%.4s'",
MagicStr.data());
return Result;
}

Expected<Format> llvm::remarks::detectFormat(Format Selected,
StringRef MagicStr) {
if (Selected == Format::Unknown)
return createStringError(std::make_error_code(std::errc::invalid_argument),
"Unknown remark parser format.");
if (Selected != Format::Auto)
return Selected;

// Empty files are valid bitstream files
if (MagicStr.empty())
return Format::Bitstream;
return magicToFormat(MagicStr);
}
14 changes: 3 additions & 11 deletions llvm/lib/Remarks/RemarkLinker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,10 @@ void RemarkLinker::setExternalFilePrependPath(StringRef PrependPathIn) {
PrependPath = std::string(PrependPathIn);
}

Error RemarkLinker::link(StringRef Buffer, std::optional<Format> RemarkFormat) {
if (!RemarkFormat) {
Expected<Format> ParserFormat = magicToFormat(Buffer);
if (!ParserFormat)
return ParserFormat.takeError();
RemarkFormat = *ParserFormat;
}

Error RemarkLinker::link(StringRef Buffer, Format RemarkFormat) {
Expected<std::unique_ptr<RemarkParser>> MaybeParser =
createRemarkParserFromMeta(
*RemarkFormat, Buffer,
RemarkFormat, Buffer,
PrependPath ? std::optional<StringRef>(StringRef(*PrependPath))
: std::optional<StringRef>());
if (!MaybeParser)
Expand All @@ -102,8 +95,7 @@ Error RemarkLinker::link(StringRef Buffer, std::optional<Format> RemarkFormat) {
return Error::success();
}

Error RemarkLinker::link(const object::ObjectFile &Obj,
std::optional<Format> RemarkFormat) {
Error RemarkLinker::link(const object::ObjectFile &Obj, Format RemarkFormat) {
Expected<std::optional<StringRef>> SectionOrErr =
getRemarksSectionContents(Obj);
if (!SectionOrErr)
Expand Down
21 changes: 15 additions & 6 deletions llvm/lib/Remarks/RemarkParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "BitstreamRemarkParser.h"
#include "YAMLRemarkParser.h"
#include "llvm-c/Remarks.h"
#include "llvm/Remarks/RemarkFormat.h"
#include "llvm/Support/CBindingWrapping.h"
#include <optional>

Expand Down Expand Up @@ -50,14 +51,18 @@ Expected<StringRef> ParsedStringTable::operator[](size_t Index) const {

Expected<std::unique_ptr<RemarkParser>>
llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf) {
switch (ParserFormat) {
auto DetectedFormat = detectFormat(ParserFormat, Buf);
if (!DetectedFormat)
return DetectedFormat.takeError();

switch (*DetectedFormat) {
case Format::YAML:
return std::make_unique<YAMLRemarkParser>(Buf);
case Format::Bitstream:
return std::make_unique<BitstreamRemarkParser>(Buf);
case Format::Unknown:
return createStringError(std::make_error_code(std::errc::invalid_argument),
"Unknown remark parser format.");
case Format::Auto:
break;
}
llvm_unreachable("unhandled ParseFormat");
}
Expand All @@ -66,15 +71,19 @@ Expected<std::unique_ptr<RemarkParser>>
llvm::remarks::createRemarkParserFromMeta(
Format ParserFormat, StringRef Buf,
std::optional<StringRef> ExternalFilePrependPath) {
switch (ParserFormat) {
auto DetectedFormat = detectFormat(ParserFormat, Buf);
if (!DetectedFormat)
return DetectedFormat.takeError();

switch (*DetectedFormat) {
case Format::YAML:
return createYAMLParserFromMeta(Buf, std::move(ExternalFilePrependPath));
case Format::Bitstream:
return createBitstreamParserFromMeta(Buf,
std::move(ExternalFilePrependPath));
case Format::Unknown:
return createStringError(std::make_error_code(std::errc::invalid_argument),
"Unknown remark parser format.");
case Format::Auto:
break;
}
llvm_unreachable("unhandled ParseFormat");
}
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Remarks/RemarkSerializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ remarks::createRemarkSerializer(Format RemarksFormat, SerializerMode Mode,
raw_ostream &OS) {
switch (RemarksFormat) {
case Format::Unknown:
case Format::Auto:
return createStringError(std::errc::invalid_argument,
"Unknown remark serializer format.");
"Invalid remark serializer format.");
case Format::YAML:
return std::make_unique<YAMLRemarkSerializer>(OS, Mode);
case Format::Bitstream:
Expand All @@ -37,8 +38,9 @@ remarks::createRemarkSerializer(Format RemarksFormat, SerializerMode Mode,
raw_ostream &OS, remarks::StringTable StrTab) {
switch (RemarksFormat) {
case Format::Unknown:
case Format::Auto:
return createStringError(std::errc::invalid_argument,
"Unknown remark serializer format.");
"Invalid remark serializer format.");
case Format::YAML:
return std::make_unique<YAMLRemarkSerializer>(OS, Mode, std::move(StrTab));
case Format::Bitstream:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
12345678
2 changes: 2 additions & 0 deletions llvm/test/tools/llvm-remarkutil/annotation-count.test
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
RUN: llvm-remarkutil annotation-count --parser=yaml --annotation-type=remark %p/Inputs/annotation-count.yaml | FileCheck %s
RUN: llvm-remarkutil annotation-count --annotation-type=remark %p/Inputs/annotation-count.yaml | FileCheck %s
RUN: llvm-remarkutil yaml2bitstream %p/Inputs/annotation-count.yaml | llvm-remarkutil annotation-count --parser=bitstream --annotation-type=remark | FileCheck %s
RUN: llvm-remarkutil yaml2bitstream %p/Inputs/annotation-count.yaml | llvm-remarkutil annotation-count --annotation-type=remark | FileCheck %s
RUN: llvm-remarkutil count --parser=yaml --count-by=arg --group-by=function --remark-name="AnnotationSummary" %p/Inputs/annotation-count.yaml | FileCheck %s --check-prefix=COUNT-CHECK
RUN: llvm-remarkutil yaml2bitstream %p/Inputs/annotation-count.yaml | llvm-remarkutil count --parser=bitstream --count-by=arg --group-by=function --remark-name="AnnotationSummary" | FileCheck %s --check-prefix=COUNT-CHECK

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
RUN: not llvm-remarkutil instruction-count %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s
RUN: not llvm-remarkutil instruction-mix %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s
RUN: not llvm-remarkutil annotation-count --annotation-type=remark %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s
RUN: not llvm-remarkutil count %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s

CHECK: error: Automatic detection of remark format failed. Unknown magic number: '1234'
5 changes: 5 additions & 0 deletions llvm/test/tools/llvm-remarkutil/empty-file.test
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ RUN: llvm-remarkutil instruction-count --parser=bitstream %p/Inputs/empty-file -
RUN: llvm-remarkutil instruction-mix --parser=bitstream %p/Inputs/empty-file --report_style=csv -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=MIXBITSTREAM
RUN: llvm-remarkutil annotation-count --parser=bitstream --annotation-type=remark %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=ANNOTATIONBITSTREAM
RUN: llvm-remarkutil count --parser=bitstream %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=COUNTBITSTREAM
; Parser format auto-detection should treat empty files as bitstream files
RUN: llvm-remarkutil instruction-count %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=SIZEBITSTREAM
RUN: llvm-remarkutil instruction-mix %p/Inputs/empty-file --report_style=csv -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=MIXBITSTREAM
RUN: llvm-remarkutil annotation-count --annotation-type=remark %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=ANNOTATIONBITSTREAM
RUN: llvm-remarkutil count %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=COUNTBITSTREAM

; YAMLPARSER: error: document root is not of mapping type.

Expand Down
4 changes: 3 additions & 1 deletion llvm/test/tools/llvm-remarkutil/instruction-count.test
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
RUN: llvm-remarkutil instruction-count --parser=yaml %p/Inputs/instruction-count.yaml | FileCheck %s
RUN: llvm-remarkutil instruction-count %p/Inputs/instruction-count.yaml | FileCheck %s
RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-count.yaml | llvm-remarkutil instruction-count --parser=bitstream | FileCheck %s
RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-count.yaml | llvm-remarkutil instruction-count | FileCheck %s
RUN: llvm-remarkutil count --parser=yaml --count-by=arg --group-by=function --remark-name="InstructionCount" %p/Inputs/instruction-count.yaml | FileCheck %s --check-prefix=COUNT-CHECK
RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-count.yaml | llvm-remarkutil count --parser=bitstream --count-by=arg --group-by=function --remark-name="InstructionCount" | FileCheck %s --check-prefix=COUNT-CHECK
RUN: not llvm-remarkutil count --parser=yaml --count-by=arg --group-by=function --rremark-name=* %p/Inputs/instruction-count.yaml 2>&1 | FileCheck %s --check-prefix=ERROR-REPOPERATOR -DARG=rremark-name
Expand All @@ -18,4 +20,4 @@ RUN: not llvm-remarkutil count --parser=yaml --count-by=arg --group-by=function
; COUNT-CHECK: func3,3

; ERROR-REPOPERATOR: error: invalid argument '--[[ARG]]=*': repetition-operator operand invalid
; ERROR-BOTHFILTERS: error: conflicting arguments: --remark-name and --rremark-name
; ERROR-BOTHFILTERS: error: conflicting arguments: --remark-name and --rremark-name
4 changes: 3 additions & 1 deletion llvm/test/tools/llvm-remarkutil/instruction-mix.test
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml | FileCheck %s
RUN: llvm-remarkutil instruction-mix %p/Inputs/instruction-mix.yaml | FileCheck %s
RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-mix.yaml | llvm-remarkutil instruction-mix --parser=bitstream | FileCheck %s
RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-mix.yaml | llvm-remarkutil instruction-mix | FileCheck %s
RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml --report_style=human | FileCheck %s
RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml --report_style=csv | FileCheck %s --check-prefix=CSV
RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml --rfilter=meow | FileCheck %s --check-prefix=MEOW-RE
Expand Down Expand Up @@ -34,4 +36,4 @@ RUN: not llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix
; NONE-EXACT: ----------- -----
; NONE-NOT: {{.*}}

; ERROR: error: invalid argument '--rfilter=*': repetition-operator operand invalid
; ERROR: error: invalid argument '--rfilter=*': repetition-operator operand invalid
3 changes: 3 additions & 0 deletions llvm/test/tools/llvm-remarkutil/size-diff/no-difference.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
RUN: llvm-remarkutil size-diff %p/Inputs/1-func-1-instr-1-stack.yaml %p/Inputs/1-func-1-instr-1-stack.yaml --parser=yaml | FileCheck -strict-whitespace %s
RUN: llvm-remarkutil size-diff %p/Inputs/1-func-1-instr-1-stack.yaml %p/Inputs/1-func-1-instr-1-stack.yaml | FileCheck -strict-whitespace %s
RUN: llvm-remarkutil yaml2bitstream -o %t.bitstream %p/Inputs/1-func-1-instr-1-stack.yaml
RUN: llvm-remarkutil size-diff %t.bitstream %p/Inputs/1-func-1-instr-1-stack.yaml | FileCheck -strict-whitespace %s

; Same file passed twice -> no changes reported.

Expand Down
9 changes: 6 additions & 3 deletions llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,12 @@
// Keep Input format and names consistent accross the modes via a macro.
#define INPUT_FORMAT_COMMAND_LINE_OPTIONS(SUBOPT) \
static cl::opt<Format> InputFormat( \
"parser", cl::desc("Input remark format to parse"), \
cl::values(clEnumValN(Format::YAML, "yaml", "YAML"), \
clEnumValN(Format::Bitstream, "bitstream", "Bitstream")), \
"parser", cl::init(Format::Auto), \
cl::desc("Input remark format to parse"), \
cl::values( \
clEnumValN(Format::Auto, "auto", "Automatic detection (default)"), \
clEnumValN(Format::YAML, "yaml", "YAML"), \
clEnumValN(Format::Bitstream, "bitstream", "Bitstream")), \
cl::sub(SUBOPT));

#define DEBUG_LOC_INFO_COMMAND_LINE_OPTIONS(SUBOPT) \
Expand Down
6 changes: 2 additions & 4 deletions llvm/unittests/Remarks/RemarksLinkingTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,10 +243,8 @@ TEST(Remarks, LinkingError) {
// Check that the prepend path is propagated and fails with the full path.
// Also ensures that the remark format is correctly auto-detected.
RL.setExternalFilePrependPath("/baddir/");
Error E = RL.link(
StringRef("REMARKS\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0badfile.opt.yaml",
40),
/*RemarkFormat=*/std::nullopt);
Error E = RL.link(StringRef(
"REMARKS\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0badfile.opt.yaml", 40));
EXPECT_TRUE(static_cast<bool>(E));
std::string ErrorMessage = toString(std::move(E));
EXPECT_EQ(StringRef(ErrorMessage).lower(),
Expand Down