Skip to content

Commit ca0ce99

Browse files
committed
[lldb] Print embedded nuls in char arrays (PR44649)
When we know the bounds of the array, print any embedded nuls instead of treating them as terminators. An exception to this rule is made for the nul character at the very end of the string. We don't print that, as otherwise 99% of the strings would end in \0. This way the strings usually come out the same as how the user typed it into the compiler (char foo[] = "with\0nuls"). It also matches how they come out in gdb. This resolves a FIXME left from D111399, and leaves another FIXME for dealing with nul characters in "escape-non-printables=false" mode. In this mode the characters cause the entire summary string to be terminated prematurely. Differential Revision: https://reviews.llvm.org/D111634
1 parent 6e1308b commit ca0ce99

File tree

4 files changed

+19
-13
lines changed

4 files changed

+19
-13
lines changed

lldb/source/Core/ValueObject.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -849,8 +849,10 @@ bool ValueObject::SetData(DataExtractor &data, Status &error) {
849849

850850
static bool CopyStringDataToBufferSP(const StreamString &source,
851851
lldb::DataBufferSP &destination) {
852-
destination = std::make_shared<DataBufferHeap>(source.GetSize() + 1, 0);
853-
memcpy(destination->GetBytes(), source.GetString().data(), source.GetSize());
852+
llvm::StringRef src = source.GetString();
853+
src.consume_back(llvm::StringRef("\0", 1));
854+
destination = std::make_shared<DataBufferHeap>(src.size(), 0);
855+
memcpy(destination->GetBytes(), src.data(), src.size());
854856
return true;
855857
}
856858

@@ -912,8 +914,8 @@ ValueObject::ReadPointedString(lldb::DataBufferSP &buffer_sp, Status &error,
912914
CopyStringDataToBufferSP(s, buffer_sp);
913915
return {0, was_capped};
914916
}
915-
buffer_sp = std::make_shared<DataBufferHeap>(cstr_len, 0);
916-
memcpy(buffer_sp->GetBytes(), cstr, cstr_len);
917+
s << llvm::StringRef(cstr, cstr_len);
918+
CopyStringDataToBufferSP(s, buffer_sp);
917919
return {cstr_len, was_capped};
918920
} else {
919921
s << "<invalid address>";
@@ -1196,6 +1198,7 @@ bool ValueObject::DumpPrintableRepresentation(
11961198
options.SetQuote('"');
11971199
options.SetSourceSize(buffer_sp->GetByteSize());
11981200
options.SetIsTruncated(read_string.second);
1201+
options.SetBinaryZeroIsTerminator(custom_format != eFormatVectorOfChar);
11991202
formatters::StringPrinter::ReadBufferAndDumpToStream<
12001203
lldb_private::formatters::StringPrinter::StringElementType::ASCII>(
12011204
options);

lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ def test(self):
9090

9191
# Different character arrays.
9292
# FIXME: Passing a 'const char *' will ignore any given format,
93-
self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("character array", "cstring"))
94-
self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("c-string", "cstring"))
93+
self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("character array", "cstring"))
94+
self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("c-string", "cstring"))
9595
self.assertIn(' = " \\e\\a\\b\\f\\n\\r\\t\\vaA09" " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n',
9696
self.getFormatted("c-string", "(char *)cstring"))
9797
self.assertIn('=\n', self.getFormatted("c-string", "(__UINT64_TYPE__)0"))
@@ -132,10 +132,10 @@ def test(self):
132132
self.assertIn('= 0x2007080c0a0d090b415a617a30391b00\n', self.getFormatted("OSType", string_expr))
133133

134134
# bytes
135-
self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("bytes", "cstring"))
135+
self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("bytes", "cstring"))
136136

137137
# bytes with ASCII
138-
self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("bytes with ASCII", "cstring"))
138+
self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("bytes with ASCII", "cstring"))
139139

140140
# unicode16
141141
self.assertIn('= U+5678 U+1234\n', self.getFormatted("unicode16", "0x12345678"))

lldb/test/API/functionalities/data-formatter/stringprinter/main.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@ struct A {
88

99
int main (int argc, char const *argv[])
1010
{
11-
A a, b;
11+
A a, b, c;
1212
// Deliberately write past the end of data to test that the formatter stops
1313
// at the end of array.
1414
memcpy(a.data, "FOOBAR", 7);
1515
memcpy(b.data, "FO\0BAR", 7);
16+
memcpy(c.data, "F\0O\0AR", 7);
1617
std::string stdstring("Hello\t\tWorld\nI am here\t\tto say hello\n"); //%self.addTearDownHook(lambda x: x.runCmd("setting set escape-non-printables true"))
1718
const char* constcharstar = stdstring.c_str();
1819
std::string longstring(
@@ -33,13 +34,15 @@ int main (int argc, char const *argv[])
3334
return 0; //% if self.TraceOn(): self.runCmd('frame variable')
3435
//% self.expect_var_path('stdstring', summary='"Hello\\t\\tWorld\\nI am here\\t\\tto say hello\\n"')
3536
//% self.expect_var_path('constcharstar', summary='"Hello\\t\\tWorld\\nI am here\\t\\tto say hello\\n"')
37+
//% self.expect_var_path("a.data", summary='"FOOB"')
38+
//% self.expect_var_path("b.data", summary=r'"FO\0B"')
39+
//% self.expect_var_path("c.data", summary=r'"F\0O"')
40+
//%
3641
//% self.runCmd("setting set escape-non-printables false")
3742
//% self.expect_var_path('stdstring', summary='"Hello\t\tWorld\nI am here\t\tto say hello\n"')
3843
//% self.expect_var_path('constcharstar', summary='"Hello\t\tWorld\nI am here\t\tto say hello\n"')
3944
//% self.assertTrue(self.frame().FindVariable('longstring').GetSummary().endswith('"...'))
4045
//% self.assertTrue(self.frame().FindVariable('longconstcharstar').GetSummary().endswith('"...'))
41-
//% self.expect_var_path("a.data", summary='"FOOB"')
42-
// FIXME: Should this be "FO\0B" instead?
43-
//% self.expect_var_path("b.data", summary='"FO"')
46+
// FIXME: make "b.data" and "c.data" work sanely
4447
}
4548

lldb/test/Shell/SymbolFile/DWARF/x86/DW_AT_const_value.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
## Variables specified using string forms. This behavior purely speculative -- I
1818
## don't know of any compiler that would represent character strings this way.
1919
# CHECK: (char [7]) string = "string"
20-
# CHECK: (char [7]) strp = "strp"
20+
# CHECK: (char [7]) strp = "strp\0\0"
2121
## Bogus attribute form. Let's make sure we don't crash at least.
2222
# CHECK: (char [7]) ref4 = <empty constant data>
2323
## A variable of pointer type.

0 commit comments

Comments
 (0)