Skip to content

Commit 54981bb

Browse files
committed
[lldb] Read register fields from target XML
This teaches ProcessGDBRemote to look for "flags" nodes in the target XML that tell you what fields a register has. https://sourceware.org/gdb/onlinedocs/gdb/Target-Description-Format.html It will check for various invalid inputs like: * Flags nodes with 0 fields in them. * Start or end being > the register size. * Fields that overlap. * Required properties not being present (e.g. no name). * Flag sets being redefined. If anything untoward is found, we'll just drop the field or the flag set altogether. Register fields are a "nice to have" so LLDB shouldn't be crashing because of them, instead just log anything we throw away. So the user can fix their XML/file a bug with their vendor. Once that is done it will sort the fields and pass them to the RegisterFields class I added previously. There is no way to see these fields yet, so tests for this code will come later when the formatting code is added. The fields are stored in a map of unique pointers on the ProcessGDBRemote class. It will give out raw pointers on the assumption that the GDB process lives longer than the users of those pointers do. Which means RegisterInfo is still a trivial struct but we are properly destroying the fields when the GDB process ends. We can't store the fields directly in the map because adding new items may cause its storage to be reallocated, which would invalidate pointers we've already given out. Reviewed By: jasonmolenda, JDevlieghere Differential Revision: https://reviews.llvm.org/D145574
1 parent cc6ab26 commit 54981bb

File tree

5 files changed

+263
-31
lines changed

5 files changed

+263
-31
lines changed

lldb/include/lldb/Target/DynamicRegisterInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <map>
1313
#include <vector>
1414

15+
#include "lldb/Target/RegisterFlags.h"
1516
#include "lldb/Utility/ConstString.h"
1617
#include "lldb/Utility/StructuredData.h"
1718
#include "lldb/lldb-private.h"
@@ -39,6 +40,8 @@ class DynamicRegisterInfo {
3940
std::vector<uint32_t> value_regs;
4041
std::vector<uint32_t> invalidate_regs;
4142
uint32_t value_reg_offset = 0;
43+
// Non-null if there is an XML provided type.
44+
const RegisterFlags *flags_type = nullptr;
4245
};
4346

4447
DynamicRegisterInfo() = default;

lldb/include/lldb/lldb-private-types.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#if defined(__cplusplus)
1313

14+
#include "lldb/Target/RegisterFlags.h"
1415
#include "lldb/lldb-private.h"
1516

1617
#include "llvm/ADT/ArrayRef.h"
@@ -62,8 +63,8 @@ struct RegisterInfo {
6263
/// this register changes. For example, the invalidate list for eax would be
6364
/// rax ax, ah, and al.
6465
uint32_t *invalidate_regs;
65-
// Will be replaced with register flags info in the next patch.
66-
void *unused;
66+
/// If not nullptr, a type defined by XML descriptions.
67+
const RegisterFlags *flags_type;
6768

6869
llvm::ArrayRef<uint8_t> data(const uint8_t *context_base) const {
6970
return llvm::ArrayRef<uint8_t>(context_base + byte_offset, byte_size);

lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp

Lines changed: 246 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
#include "lldb/Target/ABI.h"
5454
#include "lldb/Target/DynamicLoader.h"
5555
#include "lldb/Target/MemoryRegionInfo.h"
56+
#include "lldb/Target/RegisterFlags.h"
5657
#include "lldb/Target/SystemRuntime.h"
5758
#include "lldb/Target/Target.h"
5859
#include "lldb/Target/TargetList.h"
@@ -84,6 +85,7 @@
8485
#include "lldb/Utility/StringExtractorGDBRemote.h"
8586

8687
#include "llvm/ADT/ScopeExit.h"
88+
#include "llvm/ADT/StringMap.h"
8789
#include "llvm/ADT/StringSwitch.h"
8890
#include "llvm/Support/FormatAdapters.h"
8991
#include "llvm/Support/Threading.h"
@@ -4059,15 +4061,213 @@ struct GdbServerTargetInfo {
40594061
RegisterSetMap reg_set_map;
40604062
};
40614063

4062-
bool ParseRegisters(XMLNode feature_node, GdbServerTargetInfo &target_info,
4063-
std::vector<DynamicRegisterInfo::Register> &registers) {
4064+
static std::vector<RegisterFlags::Field> ParseFlagsFields(XMLNode flags_node,
4065+
unsigned size) {
4066+
Log *log(GetLog(GDBRLog::Process));
4067+
const unsigned max_start_bit = size * 8 - 1;
4068+
4069+
// Process the fields of this set of flags.
4070+
std::vector<RegisterFlags::Field> fields;
4071+
flags_node.ForEachChildElementWithName("field", [&fields, max_start_bit,
4072+
&log](const XMLNode
4073+
&field_node) {
4074+
std::optional<llvm::StringRef> name;
4075+
std::optional<unsigned> start;
4076+
std::optional<unsigned> end;
4077+
4078+
field_node.ForEachAttribute([&name, &start, &end, max_start_bit,
4079+
&log](const llvm::StringRef &attr_name,
4080+
const llvm::StringRef &attr_value) {
4081+
// Note that XML in general requires that each of these attributes only
4082+
// appears once, so we don't have to handle that here.
4083+
if (attr_name == "name") {
4084+
LLDB_LOG(log,
4085+
"ProcessGDBRemote::ParseFlags Found field node name \"{0}\"",
4086+
attr_value.data());
4087+
name = attr_value;
4088+
} else if (attr_name == "start") {
4089+
unsigned parsed_start = 0;
4090+
if (llvm::to_integer(attr_value, parsed_start)) {
4091+
if (parsed_start > max_start_bit) {
4092+
LLDB_LOG(
4093+
log,
4094+
"ProcessGDBRemote::ParseFlags Invalid start {0} in field node, "
4095+
"cannot be > {1}",
4096+
parsed_start, max_start_bit);
4097+
} else
4098+
start = parsed_start;
4099+
} else {
4100+
LLDB_LOG(log,
4101+
"ProcessGDBRemote::ParseFlags Invalid start \"{0}\" in "
4102+
"field node",
4103+
attr_value.data());
4104+
}
4105+
} else if (attr_name == "end") {
4106+
unsigned parsed_end = 0;
4107+
if (llvm::to_integer(attr_value, parsed_end))
4108+
if (parsed_end > max_start_bit) {
4109+
LLDB_LOG(
4110+
log,
4111+
"ProcessGDBRemote::ParseFlags Invalid end {0} in field node, "
4112+
"cannot be > {1}",
4113+
parsed_end, max_start_bit);
4114+
} else
4115+
end = parsed_end;
4116+
else {
4117+
LLDB_LOG(
4118+
log,
4119+
"ProcessGDBRemote::ParseFlags Invalid end \"{0}\" in field node",
4120+
attr_value.data());
4121+
}
4122+
} else if (attr_name == "type") {
4123+
// Type is a known attribute but we do not currently use it and it is
4124+
// not required.
4125+
} else {
4126+
LLDB_LOG(log,
4127+
"ProcessGDBRemote::ParseFlags Ignoring unknown attribute "
4128+
"\"{0}\" in field node",
4129+
attr_name.data());
4130+
}
4131+
4132+
return true; // Walk all attributes of the field.
4133+
});
4134+
4135+
if (name && start && end) {
4136+
if (*start > *end) {
4137+
LLDB_LOG(log,
4138+
"ProcessGDBRemote::ParseFlags Start {0} > end {1} in field "
4139+
"\"{2}\", ignoring",
4140+
*start, *end, name->data());
4141+
} else {
4142+
fields.push_back(RegisterFlags::Field(name->str(), *start, *end));
4143+
}
4144+
}
4145+
4146+
return true; // Iterate all "field" nodes.
4147+
});
4148+
return fields;
4149+
}
4150+
4151+
void ParseFlags(
4152+
XMLNode feature_node,
4153+
llvm::StringMap<std::unique_ptr<RegisterFlags>> &registers_flags_types) {
4154+
Log *log(GetLog(GDBRLog::Process));
4155+
4156+
feature_node.ForEachChildElementWithName(
4157+
"flags",
4158+
[&log, &registers_flags_types](const XMLNode &flags_node) -> bool {
4159+
LLDB_LOG(log, "ProcessGDBRemote::ParseFlags Found flags node \"{0}\"",
4160+
flags_node.GetAttributeValue("id").c_str());
4161+
4162+
std::optional<llvm::StringRef> id;
4163+
std::optional<unsigned> size;
4164+
flags_node.ForEachAttribute(
4165+
[&id, &size, &log](const llvm::StringRef &name,
4166+
const llvm::StringRef &value) {
4167+
if (name == "id") {
4168+
id = value;
4169+
} else if (name == "size") {
4170+
unsigned parsed_size = 0;
4171+
if (llvm::to_integer(value, parsed_size))
4172+
size = parsed_size;
4173+
else {
4174+
LLDB_LOG(log,
4175+
"ProcessGDBRemote::ParseFlags Invalid size \"{0}\" "
4176+
"in flags node",
4177+
value.data());
4178+
}
4179+
} else {
4180+
LLDB_LOG(log,
4181+
"ProcessGDBRemote::ParseFlags Ignoring unknown "
4182+
"attribute \"{0}\" in flags node",
4183+
name.data());
4184+
}
4185+
return true; // Walk all attributes.
4186+
});
4187+
4188+
if (id && size) {
4189+
// Process the fields of this set of flags.
4190+
std::vector<RegisterFlags::Field> fields =
4191+
ParseFlagsFields(flags_node, *size);
4192+
if (fields.size()) {
4193+
// Sort so that the fields with the MSBs are first.
4194+
std::sort(fields.rbegin(), fields.rend());
4195+
std::vector<RegisterFlags::Field>::const_iterator overlap =
4196+
std::adjacent_find(fields.begin(), fields.end(),
4197+
[](const RegisterFlags::Field &lhs,
4198+
const RegisterFlags::Field &rhs) {
4199+
return lhs.Overlaps(rhs);
4200+
});
4201+
4202+
// If no fields overlap, use them.
4203+
if (overlap == fields.end()) {
4204+
if (registers_flags_types.find(*id) !=
4205+
registers_flags_types.end()) {
4206+
// In theory you could define some flag set, use it with a
4207+
// register then redefine it. We do not know if anyone does
4208+
// that, or what they would expect to happen in that case.
4209+
//
4210+
// LLDB chooses to take the first definition and ignore the rest
4211+
// as waiting until everything has been processed is more
4212+
// expensive and difficult. This means that pointers to flag
4213+
// sets in the register info remain valid if later the flag set
4214+
// is redefined. If we allowed redefinitions, LLDB would crash
4215+
// when you tried to print a register that used the original
4216+
// definition.
4217+
LLDB_LOG(
4218+
log,
4219+
"ProcessGDBRemote::ParseFlags Definition of flags "
4220+
"\"{0}\" shadows "
4221+
"previous definition, using original definition instead.",
4222+
id->data());
4223+
} else {
4224+
registers_flags_types.insert_or_assign(
4225+
*id, std::make_unique<RegisterFlags>(id->str(), *size,
4226+
std::move(fields)));
4227+
}
4228+
} else {
4229+
// If any fields overlap, ignore the whole set of flags.
4230+
std::vector<RegisterFlags::Field>::const_iterator next =
4231+
std::next(overlap);
4232+
LLDB_LOG(
4233+
log,
4234+
"ProcessGDBRemote::ParseFlags Ignoring flags because fields "
4235+
"{0} (start: {1} end: {2}) and {3} (start: {4} end: {5}) "
4236+
"overlap.",
4237+
overlap->GetName().c_str(), overlap->GetStart(),
4238+
overlap->GetEnd(), next->GetName().c_str(), next->GetStart(),
4239+
next->GetEnd());
4240+
}
4241+
} else {
4242+
LLDB_LOG(
4243+
log,
4244+
"ProcessGDBRemote::ParseFlags Ignoring definition of flags "
4245+
"\"{0}\" because it contains no fields.",
4246+
id->data());
4247+
}
4248+
}
4249+
4250+
return true; // Keep iterating through all "flags" elements.
4251+
});
4252+
}
4253+
4254+
bool ParseRegisters(
4255+
XMLNode feature_node, GdbServerTargetInfo &target_info,
4256+
std::vector<DynamicRegisterInfo::Register> &registers,
4257+
llvm::StringMap<std::unique_ptr<RegisterFlags>> &registers_flags_types) {
40644258
if (!feature_node)
40654259
return false;
40664260

40674261
Log *log(GetLog(GDBRLog::Process));
40684262

4263+
ParseFlags(feature_node, registers_flags_types);
4264+
for (const auto &flags : registers_flags_types)
4265+
flags.second->log(log);
4266+
40694267
feature_node.ForEachChildElementWithName(
4070-
"reg", [&target_info, &registers, log](const XMLNode &reg_node) -> bool {
4268+
"reg",
4269+
[&target_info, &registers, &registers_flags_types,
4270+
log](const XMLNode &reg_node) -> bool {
40714271
std::string gdb_group;
40724272
std::string gdb_type;
40734273
DynamicRegisterInfo::Register reg_info;
@@ -4143,29 +4343,40 @@ bool ParseRegisters(XMLNode feature_node, GdbServerTargetInfo &target_info,
41434343
return true; // Keep iterating through all attributes
41444344
});
41454345

4146-
if (!gdb_type.empty() && !(encoding_set || format_set)) {
4147-
if (llvm::StringRef(gdb_type).startswith("int")) {
4148-
reg_info.format = eFormatHex;
4149-
reg_info.encoding = eEncodingUint;
4150-
} else if (gdb_type == "data_ptr" || gdb_type == "code_ptr") {
4151-
reg_info.format = eFormatAddressInfo;
4152-
reg_info.encoding = eEncodingUint;
4153-
} else if (gdb_type == "float") {
4154-
reg_info.format = eFormatFloat;
4155-
reg_info.encoding = eEncodingIEEE754;
4156-
} else if (gdb_type == "aarch64v" ||
4157-
llvm::StringRef(gdb_type).startswith("vec") ||
4158-
gdb_type == "i387_ext" || gdb_type == "uint128") {
4159-
// lldb doesn't handle 128-bit uints correctly (for ymm*h), so treat
4160-
// them as vector (similarly to xmm/ymm)
4161-
reg_info.format = eFormatVectorOfUInt8;
4162-
reg_info.encoding = eEncodingVector;
4163-
} else {
4164-
LLDB_LOGF(
4165-
log,
4166-
"ProcessGDBRemote::ParseRegisters Could not determine lldb"
4167-
"format and encoding for gdb type %s",
4168-
gdb_type.c_str());
4346+
if (!gdb_type.empty()) {
4347+
// gdb_type could reference some flags type defined in XML.
4348+
llvm::StringMap<std::unique_ptr<RegisterFlags>>::iterator it =
4349+
registers_flags_types.find(gdb_type);
4350+
if (it != registers_flags_types.end())
4351+
reg_info.flags_type = it->second.get();
4352+
4353+
// There's a slim chance that the gdb_type name is both a flags type
4354+
// and a simple type. Just in case, look for that too (setting both
4355+
// does no harm).
4356+
if (!gdb_type.empty() && !(encoding_set || format_set)) {
4357+
if (llvm::StringRef(gdb_type).startswith("int")) {
4358+
reg_info.format = eFormatHex;
4359+
reg_info.encoding = eEncodingUint;
4360+
} else if (gdb_type == "data_ptr" || gdb_type == "code_ptr") {
4361+
reg_info.format = eFormatAddressInfo;
4362+
reg_info.encoding = eEncodingUint;
4363+
} else if (gdb_type == "float") {
4364+
reg_info.format = eFormatFloat;
4365+
reg_info.encoding = eEncodingIEEE754;
4366+
} else if (gdb_type == "aarch64v" ||
4367+
llvm::StringRef(gdb_type).startswith("vec") ||
4368+
gdb_type == "i387_ext" || gdb_type == "uint128") {
4369+
// lldb doesn't handle 128-bit uints correctly (for ymm*h), so
4370+
// treat them as vector (similarly to xmm/ymm)
4371+
reg_info.format = eFormatVectorOfUInt8;
4372+
reg_info.encoding = eEncodingVector;
4373+
} else {
4374+
LLDB_LOGF(
4375+
log,
4376+
"ProcessGDBRemote::ParseRegisters Could not determine lldb"
4377+
"format and encoding for gdb type %s",
4378+
gdb_type.c_str());
4379+
}
41694380
}
41704381
}
41714382

@@ -4297,8 +4508,8 @@ bool ProcessGDBRemote::GetGDBServerRegisterInfoXMLAndProcess(
42974508

42984509
if (arch_to_use.IsValid()) {
42994510
for (auto &feature_node : feature_nodes) {
4300-
ParseRegisters(feature_node, target_info,
4301-
registers);
4511+
ParseRegisters(feature_node, target_info, registers,
4512+
m_registers_flags_types);
43024513
}
43034514

43044515
for (const auto &include : target_info.includes) {
@@ -4363,6 +4574,13 @@ bool ProcessGDBRemote::GetGDBServerRegisterInfo(ArchSpec &arch_to_use) {
43634574
if (!m_gdb_comm.GetQXferFeaturesReadSupported())
43644575
return false;
43654576

4577+
// This holds register flags information for the whole of target.xml.
4578+
// target.xml may include further documents that
4579+
// GetGDBServerRegisterInfoXMLAndProcess will recurse to fetch and process.
4580+
// That's why we clear the cache here, and not in
4581+
// GetGDBServerRegisterInfoXMLAndProcess. To prevent it being cleared on every
4582+
// include read.
4583+
m_registers_flags_types.clear();
43664584
std::vector<DynamicRegisterInfo::Register> registers;
43674585
if (GetGDBServerRegisterInfoXMLAndProcess(arch_to_use, "target.xml",
43684586
registers))

lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "GDBRemoteRegisterContext.h"
3939

4040
#include "llvm/ADT/DenseMap.h"
41+
#include "llvm/ADT/StringMap.h"
4142

4243
namespace lldb_private {
4344
namespace repro {
@@ -466,6 +467,15 @@ class ProcessGDBRemote : public Process,
466467
// fork helpers
467468
void DidForkSwitchSoftwareBreakpoints(bool enable);
468469
void DidForkSwitchHardwareTraps(bool enable);
470+
471+
// Lists of register fields generated from the remote's target XML.
472+
// Pointers to these RegisterFlags will be set in the register info passed
473+
// back to the upper levels of lldb. Doing so is safe because this class will
474+
// live at least as long as the debug session. We therefore do not store the
475+
// data directly in the map because the map may reallocate it's storage as new
476+
// entries are added. Which would invalidate any pointers set in the register
477+
// info up to that point.
478+
llvm::StringMap<std::unique_ptr<RegisterFlags>> m_registers_flags_types;
469479
};
470480

471481
} // namespace process_gdb_remote

lldb/source/Target/DynamicRegisterInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ size_t DynamicRegisterInfo::SetRegisterInfo(
407407
{reg.regnum_ehframe, reg.regnum_dwarf, reg.regnum_generic,
408408
reg.regnum_remote, local_regnum},
409409
// value_regs and invalidate_regs are filled by Finalize()
410-
nullptr, nullptr, nullptr
410+
nullptr, nullptr, reg.flags_type
411411
};
412412

413413
m_regs.push_back(reg_info);

0 commit comments

Comments
 (0)