Skip to content

Commit 208a08c

Browse files
Reland "[lldb] Parse and display register field enums" (#97258)" (#97270)
This reverts commit d9e659c. I could not reproduce the Mac OS ASAN failure locally but I narrowed it down to the test `test_many_fields_same_enum`. This test shares an enum between x0, which is 64 bit, and cpsr, which is 32 bit. My theory is that when it does `register read x0`, an enum type is created where the undlerying enumerators are 64 bit, matching the register size. Then it does `register read cpsr` which used the cached enum type, but this register is 32 bit. This caused lldb to try to read an 8 byte value out of a 4 byte allocation: READ of size 8 at 0x60200014b874 thread T0 <...> =>0x60200014b800: fa fa fd fa fa fa fd fa fa fa fd fa fa fa[04]fa To fix this I've added the register's size in bytes to the constructed enum type's name. This means that x0 uses: __lldb_register_fields_enum_some_enum_8 And cpsr uses: __lldb_register_fields_enum_some_enum_4 If any other registers use this enum and are read, they will use the cached type as long as their size matches, otherwise we make a new type.
1 parent 8bb00cb commit 208a08c

File tree

8 files changed

+678
-22
lines changed

8 files changed

+678
-22
lines changed

lldb/include/lldb/Target/RegisterFlags.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,15 @@ class FieldEnum {
3232
: m_value(value), m_name(std::move(name)) {}
3333

3434
void ToXML(Stream &strm) const;
35+
36+
void DumpToLog(Log *log) const;
3537
};
3638

3739
typedef std::vector<Enumerator> Enumerators;
3840

41+
// GDB also includes a "size" that is the size of the underlying register.
42+
// We will not store that here but instead use the size of the register
43+
// this gets attached to when emitting XML.
3944
FieldEnum(std::string id, const Enumerators &enumerators);
4045

4146
const Enumerators &GetEnumerators() const { return m_enumerators; }
@@ -44,6 +49,8 @@ class FieldEnum {
4449

4550
void ToXML(Stream &strm, unsigned size) const;
4651

52+
void DumpToLog(Log *log) const;
53+
4754
private:
4855
std::string m_id;
4956
Enumerators m_enumerators;

lldb/source/Core/DumpRegisterInfo.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,11 @@ void lldb_private::DoDumpRegisterInfo(
111111
};
112112
DumpList(strm, " In sets: ", in_sets, emit_set);
113113

114-
if (flags_type)
114+
if (flags_type) {
115115
strm.Printf("\n\n%s", flags_type->AsTable(terminal_width).c_str());
116+
117+
std::string enumerators = flags_type->DumpEnums(terminal_width);
118+
if (enumerators.size())
119+
strm << "\n\n" << enumerators;
120+
}
116121
}

lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp

Lines changed: 180 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4179,21 +4179,134 @@ struct GdbServerTargetInfo {
41794179
RegisterSetMap reg_set_map;
41804180
};
41814181

4182-
static std::vector<RegisterFlags::Field> ParseFlagsFields(XMLNode flags_node,
4183-
unsigned size) {
4182+
static FieldEnum::Enumerators ParseEnumEvalues(const XMLNode &enum_node) {
4183+
Log *log(GetLog(GDBRLog::Process));
4184+
// We will use the last instance of each value. Also we preserve the order
4185+
// of declaration in the XML, as it may not be numerical.
4186+
// For example, hardware may intially release with two states that softwware
4187+
// can read from a register field:
4188+
// 0 = startup, 1 = running
4189+
// If in a future hardware release, the designers added a pre-startup state:
4190+
// 0 = startup, 1 = running, 2 = pre-startup
4191+
// Now it makes more sense to list them in this logical order as opposed to
4192+
// numerical order:
4193+
// 2 = pre-startup, 1 = startup, 0 = startup
4194+
// This only matters for "register info" but let's trust what the server
4195+
// chose regardless.
4196+
std::map<uint64_t, FieldEnum::Enumerator> enumerators;
4197+
4198+
enum_node.ForEachChildElementWithName(
4199+
"evalue", [&enumerators, &log](const XMLNode &enumerator_node) {
4200+
std::optional<llvm::StringRef> name;
4201+
std::optional<uint64_t> value;
4202+
4203+
enumerator_node.ForEachAttribute(
4204+
[&name, &value, &log](const llvm::StringRef &attr_name,
4205+
const llvm::StringRef &attr_value) {
4206+
if (attr_name == "name") {
4207+
if (attr_value.size())
4208+
name = attr_value;
4209+
else
4210+
LLDB_LOG(log, "ProcessGDBRemote::ParseEnumEvalues "
4211+
"Ignoring empty name in evalue");
4212+
} else if (attr_name == "value") {
4213+
uint64_t parsed_value = 0;
4214+
if (llvm::to_integer(attr_value, parsed_value))
4215+
value = parsed_value;
4216+
else
4217+
LLDB_LOG(log,
4218+
"ProcessGDBRemote::ParseEnumEvalues "
4219+
"Invalid value \"{0}\" in "
4220+
"evalue",
4221+
attr_value.data());
4222+
} else
4223+
LLDB_LOG(log,
4224+
"ProcessGDBRemote::ParseEnumEvalues Ignoring "
4225+
"unknown attribute "
4226+
"\"{0}\" in evalue",
4227+
attr_name.data());
4228+
4229+
// Keep walking attributes.
4230+
return true;
4231+
});
4232+
4233+
if (value && name)
4234+
enumerators.insert_or_assign(
4235+
*value, FieldEnum::Enumerator(*value, name->str()));
4236+
4237+
// Find all evalue elements.
4238+
return true;
4239+
});
4240+
4241+
FieldEnum::Enumerators final_enumerators;
4242+
for (auto [_, enumerator] : enumerators)
4243+
final_enumerators.push_back(enumerator);
4244+
4245+
return final_enumerators;
4246+
}
4247+
4248+
static void
4249+
ParseEnums(XMLNode feature_node,
4250+
llvm::StringMap<std::unique_ptr<FieldEnum>> &registers_enum_types) {
4251+
Log *log(GetLog(GDBRLog::Process));
4252+
4253+
// The top level element is "<enum...".
4254+
feature_node.ForEachChildElementWithName(
4255+
"enum", [log, &registers_enum_types](const XMLNode &enum_node) {
4256+
std::string id;
4257+
4258+
enum_node.ForEachAttribute([&id](const llvm::StringRef &attr_name,
4259+
const llvm::StringRef &attr_value) {
4260+
if (attr_name == "id")
4261+
id = attr_value;
4262+
4263+
// There is also a "size" attribute that is supposed to be the size in
4264+
// bytes of the register this applies to. However:
4265+
// * LLDB doesn't need this information.
4266+
// * It is difficult to verify because you have to wait until the
4267+
// enum is applied to a field.
4268+
//
4269+
// So we will emit this attribute in XML for GDB's sake, but will not
4270+
// bother ingesting it.
4271+
4272+
// Walk all attributes.
4273+
return true;
4274+
});
4275+
4276+
if (!id.empty()) {
4277+
FieldEnum::Enumerators enumerators = ParseEnumEvalues(enum_node);
4278+
if (!enumerators.empty()) {
4279+
LLDB_LOG(log,
4280+
"ProcessGDBRemote::ParseEnums Found enum type \"{0}\"",
4281+
id);
4282+
registers_enum_types.insert_or_assign(
4283+
id, std::make_unique<FieldEnum>(id, enumerators));
4284+
}
4285+
}
4286+
4287+
// Find all <enum> elements.
4288+
return true;
4289+
});
4290+
}
4291+
4292+
static std::vector<RegisterFlags::Field> ParseFlagsFields(
4293+
XMLNode flags_node, unsigned size,
4294+
const llvm::StringMap<std::unique_ptr<FieldEnum>> &registers_enum_types) {
41844295
Log *log(GetLog(GDBRLog::Process));
41854296
const unsigned max_start_bit = size * 8 - 1;
41864297

41874298
// Process the fields of this set of flags.
41884299
std::vector<RegisterFlags::Field> fields;
4189-
flags_node.ForEachChildElementWithName("field", [&fields, max_start_bit,
4190-
&log](const XMLNode
4191-
&field_node) {
4300+
flags_node.ForEachChildElementWithName("field", [&fields, max_start_bit, &log,
4301+
&registers_enum_types](
4302+
const XMLNode
4303+
&field_node) {
41924304
std::optional<llvm::StringRef> name;
41934305
std::optional<unsigned> start;
41944306
std::optional<unsigned> end;
4307+
std::optional<llvm::StringRef> type;
41954308

4196-
field_node.ForEachAttribute([&name, &start, &end, max_start_bit,
4309+
field_node.ForEachAttribute([&name, &start, &end, &type, max_start_bit,
41974310
&log](const llvm::StringRef &attr_name,
41984311
const llvm::StringRef &attr_value) {
41994312
// Note that XML in general requires that each of these attributes only
@@ -4240,8 +4353,7 @@ static std::vector<RegisterFlags::Field> ParseFlagsFields(XMLNode flags_node,
42404353
attr_value.data());
42414354
}
42424355
} else if (attr_name == "type") {
4243-
// Type is a known attribute but we do not currently use it and it is
4244-
// not required.
4356+
type = attr_value;
42454357
} else {
42464358
LLDB_LOG(
42474359
log,
@@ -4254,14 +4366,55 @@ static std::vector<RegisterFlags::Field> ParseFlagsFields(XMLNode flags_node,
42544366
});
42554367

42564368
if (name && start && end) {
4257-
if (*start > *end) {
4369+
if (*start > *end)
42584370
LLDB_LOG(
42594371
log,
42604372
"ProcessGDBRemote::ParseFlagsFields Start {0} > end {1} in field "
42614373
"\"{2}\", ignoring",
42624374
*start, *end, name->data());
4263-
} else {
4264-
fields.push_back(RegisterFlags::Field(name->str(), *start, *end));
4375+
else {
4376+
if (RegisterFlags::Field::GetSizeInBits(*start, *end) > 64)
4377+
LLDB_LOG(log,
4378+
"ProcessGDBRemote::ParseFlagsFields Ignoring field \"{2}\" "
4379+
"that has "
4380+
"size > 64 bits, this is not supported",
4381+
name->data());
4382+
else {
4383+
// A field's type may be set to the name of an enum type.
4384+
const FieldEnum *enum_type = nullptr;
4385+
if (type && !type->empty()) {
4386+
auto found = registers_enum_types.find(*type);
4387+
if (found != registers_enum_types.end()) {
4388+
enum_type = found->second.get();
4389+
4390+
// No enumerator can exceed the range of the field itself.
4391+
uint64_t max_value =
4392+
RegisterFlags::Field::GetMaxValue(*start, *end);
4393+
for (const auto &enumerator : enum_type->GetEnumerators()) {
4394+
if (enumerator.m_value > max_value) {
4395+
enum_type = nullptr;
4396+
LLDB_LOG(
4397+
log,
4398+
"ProcessGDBRemote::ParseFlagsFields In enum \"{0}\" "
4399+
"evalue \"{1}\" with value {2} exceeds the maximum value "
4400+
"of field \"{3}\" ({4}), ignoring enum",
4401+
type->data(), enumerator.m_name, enumerator.m_value,
4402+
name->data(), max_value);
4403+
break;
4404+
}
4405+
}
4406+
} else {
4407+
LLDB_LOG(log,
4408+
"ProcessGDBRemote::ParseFlagsFields Could not find type "
4409+
"\"{0}\" "
4410+
"for field \"{1}\", ignoring",
4411+
type->data(), name->data());
4412+
}
4413+
}
4414+
4415+
fields.push_back(
4416+
RegisterFlags::Field(name->str(), *start, *end, enum_type));
4417+
}
42654418
}
42664419
}
42674420

@@ -4272,12 +4425,14 @@ static std::vector<RegisterFlags::Field> ParseFlagsFields(XMLNode flags_node,
42724425

42734426
void ParseFlags(
42744427
XMLNode feature_node,
4275-
llvm::StringMap<std::unique_ptr<RegisterFlags>> &registers_flags_types) {
4428+
llvm::StringMap<std::unique_ptr<RegisterFlags>> &registers_flags_types,
4429+
const llvm::StringMap<std::unique_ptr<FieldEnum>> &registers_enum_types) {
42764430
Log *log(GetLog(GDBRLog::Process));
42774431

42784432
feature_node.ForEachChildElementWithName(
42794433
"flags",
4280-
[&log, &registers_flags_types](const XMLNode &flags_node) -> bool {
4434+
[&log, &registers_flags_types,
4435+
&registers_enum_types](const XMLNode &flags_node) -> bool {
42814436
LLDB_LOG(log, "ProcessGDBRemote::ParseFlags Found flags node \"{0}\"",
42824437
flags_node.GetAttributeValue("id").c_str());
42834438

@@ -4310,7 +4465,7 @@ void ParseFlags(
43104465
if (id && size) {
43114466
// Process the fields of this set of flags.
43124467
std::vector<RegisterFlags::Field> fields =
4313-
ParseFlagsFields(flags_node, *size);
4468+
ParseFlagsFields(flags_node, *size, registers_enum_types);
43144469
if (fields.size()) {
43154470
// Sort so that the fields with the MSBs are first.
43164471
std::sort(fields.rbegin(), fields.rend());
@@ -4375,13 +4530,19 @@ void ParseFlags(
43754530
bool ParseRegisters(
43764531
XMLNode feature_node, GdbServerTargetInfo &target_info,
43774532
std::vector<DynamicRegisterInfo::Register> &registers,
4378-
llvm::StringMap<std::unique_ptr<RegisterFlags>> &registers_flags_types) {
4533+
llvm::StringMap<std::unique_ptr<RegisterFlags>> &registers_flags_types,
4534+
llvm::StringMap<std::unique_ptr<FieldEnum>> &registers_enum_types) {
43794535
if (!feature_node)
43804536
return false;
43814537

43824538
Log *log(GetLog(GDBRLog::Process));
43834539

4384-
ParseFlags(feature_node, registers_flags_types);
4540+
// Enums first because they are referenced by fields in the flags.
4541+
ParseEnums(feature_node, registers_enum_types);
4542+
for (const auto &enum_type : registers_enum_types)
4543+
enum_type.second->DumpToLog(log);
4544+
4545+
ParseFlags(feature_node, registers_flags_types, registers_enum_types);
43854546
for (const auto &flags : registers_flags_types)
43864547
flags.second->DumpToLog(log);
43874548

@@ -4643,7 +4804,7 @@ bool ProcessGDBRemote::GetGDBServerRegisterInfoXMLAndProcess(
46434804
if (arch_to_use.IsValid()) {
46444805
for (auto &feature_node : feature_nodes) {
46454806
ParseRegisters(feature_node, target_info, registers,
4646-
m_registers_flags_types);
4807+
m_registers_flags_types, m_registers_enum_types);
46474808
}
46484809

46494810
for (const auto &include : target_info.includes) {
@@ -4708,13 +4869,14 @@ bool ProcessGDBRemote::GetGDBServerRegisterInfo(ArchSpec &arch_to_use) {
47084869
if (!m_gdb_comm.GetQXferFeaturesReadSupported())
47094870
return false;
47104871

4711-
// This holds register flags information for the whole of target.xml.
4872+
// These hold register type information for the whole of target.xml.
47124873
// target.xml may include further documents that
47134874
// GetGDBServerRegisterInfoXMLAndProcess will recurse to fetch and process.
47144875
// That's why we clear the cache here, and not in
47154876
// GetGDBServerRegisterInfoXMLAndProcess. To prevent it being cleared on every
47164877
// include read.
47174878
m_registers_flags_types.clear();
4879+
m_registers_enum_types.clear();
47184880
std::vector<DynamicRegisterInfo::Register> registers;
47194881
if (GetGDBServerRegisterInfoXMLAndProcess(arch_to_use, "target.xml",
47204882
registers) &&

lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,11 @@ class ProcessGDBRemote : public Process,
484484
// entries are added. Which would invalidate any pointers set in the register
485485
// info up to that point.
486486
llvm::StringMap<std::unique_ptr<RegisterFlags>> m_registers_flags_types;
487+
488+
// Enum types are referenced by register fields. This does not store the data
489+
// directly because the map may reallocate. Pointers to these are contained
490+
// within instances of RegisterFlags.
491+
llvm::StringMap<std::unique_ptr<FieldEnum>> m_registers_enum_types;
487492
};
488493

489494
} // namespace process_gdb_remote

lldb/source/Plugins/RegisterTypeBuilder/RegisterTypeBuilderClang.cpp

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ CompilerType RegisterTypeBuilderClang::GetRegisterType(
4343
ScratchTypeSystemClang::GetForTarget(m_target);
4444
assert(type_system);
4545

46-
std::string register_type_name = "__lldb_register_fields_";
47-
register_type_name += name;
46+
std::string register_type_name = "__lldb_register_fields_" + name;
4847
// See if we have made this type before and can reuse it.
4948
CompilerType fields_type =
5049
type_system->GetTypeForIdentifier<clang::CXXRecordDecl>(
@@ -67,8 +66,48 @@ CompilerType RegisterTypeBuilderClang::GetRegisterType(
6766
// We assume that RegisterFlags has padded and sorted the fields
6867
// already.
6968
for (const RegisterFlags::Field &field : flags.GetFields()) {
69+
CompilerType field_type = field_uint_type;
70+
71+
if (const FieldEnum *enum_type = field.GetEnum()) {
72+
const FieldEnum::Enumerators &enumerators = enum_type->GetEnumerators();
73+
if (!enumerators.empty()) {
74+
// Enums can be used by many registers and the size of each register
75+
// may be different. The register size is used as the underlying size
76+
// of the enumerators, so we must make one enum type per register size
77+
// it is used with.
78+
std::string enum_type_name = "__lldb_register_fields_enum_" +
79+
enum_type->GetID() + "_" +
80+
std::to_string(byte_size);
81+
82+
// Enums can be used by mutiple fields and multiple registers, so we
83+
// may have built this one already.
84+
CompilerType field_enum_type =
85+
type_system->GetTypeForIdentifier<clang::EnumDecl>(
86+
enum_type_name);
87+
88+
if (field_enum_type)
89+
field_type = field_enum_type;
90+
else {
91+
field_type = type_system->CreateEnumerationType(
92+
enum_type_name, type_system->GetTranslationUnitDecl(),
93+
OptionalClangModuleID(), Declaration(), field_uint_type, false);
94+
95+
type_system->StartTagDeclarationDefinition(field_type);
96+
97+
Declaration decl;
98+
for (auto enumerator : enumerators) {
99+
type_system->AddEnumerationValueToEnumerationType(
100+
field_type, decl, enumerator.m_name.c_str(),
101+
enumerator.m_value, byte_size * 8);
102+
}
103+
104+
type_system->CompleteTagDeclarationDefinition(field_type);
105+
}
106+
}
107+
}
108+
70109
type_system->AddFieldToRecordType(fields_type, field.GetName(),
71-
field_uint_type, lldb::eAccessPublic,
110+
field_type, lldb::eAccessPublic,
72111
field.GetSizeInBits());
73112
}
74113

0 commit comments

Comments
 (0)