Skip to content

[lldb] Implement ANSI & Unicode aware string stripping & padding #130878

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 78 additions & 14 deletions lldb/include/lldb/Utility/AnsiTerminal.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,12 @@
#define ANSI_1_CTRL(ctrl1) "\033["##ctrl1 ANSI_ESC_END
#define ANSI_2_CTRL(ctrl1, ctrl2) "\033["##ctrl1 ";"##ctrl2 ANSI_ESC_END

#define ANSI_ESC_START_LEN 2

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Locale.h"

#include <string>

Expand Down Expand Up @@ -172,28 +175,89 @@ inline std::string FormatAnsiTerminalCodes(llvm::StringRef format,
return fmt;
}

inline std::tuple<llvm::StringRef, llvm::StringRef, llvm::StringRef>
FindNextAnsiSequence(llvm::StringRef str) {
llvm::StringRef left;
llvm::StringRef right = str;

while (!right.empty()) {
const size_t start = right.find(ANSI_ESC_START);

// ANSI_ESC_START not found.
if (start == llvm::StringRef::npos)
return {str, {}, {}};

// Split the string around the current ANSI_ESC_START.
left = str.take_front(left.size() + start);
llvm::StringRef escape = right.substr(start);
right = right.substr(start + ANSI_ESC_START_LEN + 1);

const size_t end = right.find_first_not_of("0123456789;");

// ANSI_ESC_END found.
if (end < right.size() && (right[end] == 'm' || right[end] == 'G'))
return {left, escape.take_front(ANSI_ESC_START_LEN + 1 + end + 1),
right.substr(end + 1)};

// Maintain the invariant that str == left + right at the start of the loop.
left = str.take_front(left.size() + ANSI_ESC_START_LEN + 1);
}

return {str, {}, {}};
}

inline std::string StripAnsiTerminalCodes(llvm::StringRef str) {
std::string stripped;
while (!str.empty()) {
llvm::StringRef left, right;

std::tie(left, right) = str.split(ANSI_ESC_START);
auto [left, escape, right] = FindNextAnsiSequence(str);
stripped += left;
str = right;
}
return stripped;
}

// ANSI_ESC_START not found.
if (left == str && right.empty())
break;
inline std::string TrimAndPad(llvm::StringRef str, size_t visible_length,
char padding = ' ') {
std::string result;
result.reserve(visible_length);
size_t result_visibile_length = 0;

size_t end = right.find_first_not_of("0123456789;");
if (end < right.size() && (right[end] == 'm' || right[end] == 'G')) {
str = right.substr(end + 1);
} else {
// ANSI_ESC_END not found.
stripped += ANSI_ESC_START;
str = right;
// Trim the string to the given visible length.
while (!str.empty()) {
auto [left, escape, right] = FindNextAnsiSequence(str);
str = right;

// Compute the length of the string without escape codes. If it fits, append
// it together with the invisible escape code.
size_t column_width = llvm::sys::locale::columnWidth(left);
if (result_visibile_length + column_width <= visible_length) {
result.append(left).append(escape);
result_visibile_length += column_width;
continue;
}

// The string might contain unicode which means it's not safe to truncate.
// Repeatedly trim the string until it its valid unicode and fits.
llvm::StringRef trimmed = left;
while (!trimmed.empty()) {
// This relies on columnWidth returning -2 for invalid/partial unicode
// characters, which after conversion to size_t will be larger than the
// visible width.
column_width = llvm::sys::locale::columnWidth(trimmed);
if (result_visibile_length + column_width <= visible_length) {
result.append(trimmed);
result_visibile_length += column_width;
break;
}
trimmed = trimmed.drop_back();
}
}
return stripped;

// Pad the string.
if (result_visibile_length < visible_length)
result.append(visible_length - result_visibile_length, padding);

return result;
}

} // namespace ansi
Expand Down
49 changes: 49 additions & 0 deletions lldb/unittests/Utility/AnsiTerminalTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,52 @@ TEST(AnsiTerminal, InvalidEscapeCode) {
EXPECT_EQ("abc\x1B[31kabcabc",
ansi::StripAnsiTerminalCodes("abc\x1B[31kabc\x1B[0mabc"));
}

TEST(AnsiTerminal, FindNextAnsiSequenceBasic) {
auto [left, escape, right] = ansi::FindNextAnsiSequence("foo\x1B[31mbar");
EXPECT_EQ("foo", left);
EXPECT_EQ("\x1B[31m", escape);
EXPECT_EQ("bar", right);
}

TEST(AnsiTerminal, FindNextAnsiSequenceIncompleteStart) {
auto [left, escape, right] =
ansi::FindNextAnsiSequence("foo\x1B[bar\x1B[31mbaz");
EXPECT_EQ("foo\x1B[bar", left);
EXPECT_EQ("\x1B[31m", escape);
EXPECT_EQ("baz", right);
}

TEST(AnsiTerminal, FindNextAnsiSequenceEscapeStart) {
auto [left, escape, right] = ansi::FindNextAnsiSequence("\x1B[31mfoo");
EXPECT_EQ("", left);
EXPECT_EQ("\x1B[31m", escape);
EXPECT_EQ("foo", right);
}

TEST(AnsiTerminal, TrimAndPad) {
// Test basic ASCII.
EXPECT_EQ(" ", ansi::TrimAndPad("", 5));
EXPECT_EQ("foo ", ansi::TrimAndPad("foo", 5));
EXPECT_EQ("fooba", ansi::TrimAndPad("fooba", 5));
EXPECT_EQ("fooba", ansi::TrimAndPad("foobar", 5));

// Simple test that ANSI escape codes don't contribute to the visible width.
EXPECT_EQ("\x1B[30m ", ansi::TrimAndPad("\x1B[30m", 5));
EXPECT_EQ("\x1B[30mfoo ", ansi::TrimAndPad("\x1B[30mfoo", 5));
EXPECT_EQ("\x1B[30mfooba", ansi::TrimAndPad("\x1B[30mfooba", 5));
EXPECT_EQ("\x1B[30mfooba", ansi::TrimAndPad("\x1B[30mfoobar", 5));

// Test that we include as many escape codes as we can.
EXPECT_EQ("fooba\x1B[30m", ansi::TrimAndPad("fooba\x1B[30m", 5));
EXPECT_EQ("fooba\x1B[30m\x1B[34m",
ansi::TrimAndPad("fooba\x1B[30m\x1B[34m", 5));
EXPECT_EQ("fooba\x1B[30m\x1B[34m",
ansi::TrimAndPad("fooba\x1B[30m\x1B[34mr", 5));

// Test Unicode.
EXPECT_EQ("❤️ ", ansi::TrimAndPad("❤️", 5));
EXPECT_EQ(" ❤️", ansi::TrimAndPad(" ❤️", 5));
EXPECT_EQ("12❤️4❤️", ansi::TrimAndPad("12❤️4❤️", 5));
EXPECT_EQ("12❤️45", ansi::TrimAndPad("12❤️45❤️", 5));
}