Skip to content

Commit ebbe564

Browse files
committed
new synthetic children provider for CFString and related classes ; test case for it
llvm-svn: 136525
1 parent b081ba6 commit ebbe564

File tree

4 files changed

+577
-3
lines changed

4 files changed

+577
-3
lines changed

lldb/examples/synthetic/CFString.py

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
# synthetic children provider for CFString
2+
# (and related NSString class)
3+
import lldb
4+
class CFStringSynthProvider:
5+
def __init__(self,valobj,dict):
6+
self.valobj = valobj;
7+
self.update()
8+
# children other than "content" are for debugging only and must not be used in production code
9+
def num_children(self):
10+
return 6;
11+
def read_unicode(self, pointer):
12+
process = self.valobj.GetTarget().GetProcess()
13+
error = lldb.SBError()
14+
pystr = u''
15+
# cannot do the read at once because the length value has
16+
# a weird encoding. better play it safe here
17+
while True:
18+
content = process.ReadMemory(pointer, 2, error)
19+
new_bytes = bytearray(content)
20+
b0 = new_bytes[0]
21+
b1 = new_bytes[1]
22+
pointer = pointer + 2
23+
if b0 == 0 and b1 == 0:
24+
break
25+
# rearrange bytes depending on endianness
26+
# (do we really need this or is Cocoa going to
27+
# use Windows-compatible little-endian even
28+
# if the target is big endian?)
29+
if self.is_little:
30+
value = b1 * 256 + b0
31+
else:
32+
value = b0 * 256 + b1
33+
pystr = pystr + unichr(value)
34+
return pystr
35+
# handle the special case strings
36+
# only use the custom code for the tested LP64 case
37+
def handle_special(self):
38+
if self.lp64 == False:
39+
# for 32bit targets, use safe ObjC code
40+
return self.handle_unicode_string_safe()
41+
offset = 12
42+
pointer = int(self.valobj.GetValue(), 0) + offset
43+
pystr = self.read_unicode(pointer)
44+
return self.valobj.CreateValueFromExpression("content",
45+
"(char*)\"" + pystr.encode('utf-8') + "\"")
46+
# last resort call, use ObjC code to read; the final aim is to
47+
# be able to strip this call away entirely and only do the read
48+
# ourselves
49+
def handle_unicode_string_safe(self):
50+
return self.valobj.CreateValueFromExpression("content",
51+
"(char*)\"" + self.valobj.GetObjectDescription() + "\"");
52+
def handle_unicode_string(self):
53+
# step 1: find offset
54+
if self.inline:
55+
pointer = int(self.valobj.GetValue(), 0) + self.size_of_cfruntime_base();
56+
if self.explicit == False:
57+
# untested, use the safe code path
58+
return self.handle_unicode_string_safe();
59+
else:
60+
# not sure why 8 bytes are skipped here
61+
# (lldb) mem read -c 50 0x00000001001154f0
62+
# 0x1001154f0: 98 1a 85 71 ff 7f 00 00 90 07 00 00 01 00 00 00 ...q?...........
63+
# 0x100115500: 03 00 00 00 00 00 00 00 *c3 03 78 00 78 00 00 00 ........?.x.x...
64+
# 0x100115510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
65+
# 0x100115520: 00 00 ..
66+
# content begins at * (i.e. 8 bytes into variants, skipping void* buffer in
67+
# __notInlineImmutable1 entirely, while the length byte is correctly located
68+
# for an inline string)
69+
pointer = pointer + 8;
70+
else:
71+
pointer = int(self.valobj.GetValue(), 0) + self.size_of_cfruntime_base();
72+
# read 8 bytes here and make an address out of them
73+
vopointer = self.valobj.CreateChildAtOffset("dummy",
74+
pointer,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType());
75+
pointer = int(vopointer.GetValue(), 0)
76+
# step 2: read Unicode data at pointer
77+
pystr = self.read_unicode(pointer)
78+
# step 3: return it
79+
return self.valobj.CreateValueFromExpression("content",
80+
"(char*)\"" + pystr.encode('utf-8') + "\"")
81+
# we read at "the right place" into the __CFString object instead of running code
82+
# we are replicating the functionality of __CFStrContents in CFString.c here
83+
def handle_UTF8_inline(self):
84+
offset = int(self.valobj.GetValue(), 0) + self.size_of_cfruntime_base();
85+
if self.explicit == False:
86+
offset = offset + 1;
87+
return self.valobj.CreateValueFromAddress("content",
88+
offset, self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar));
89+
def handle_UTF8_not_inline(self):
90+
offset = self.size_of_cfruntime_base();
91+
return self.valobj.CreateChildAtOffset("content",
92+
offset,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType());
93+
def get_child_at_index(self,index):
94+
if index == 0:
95+
return self.valobj.CreateValueFromExpression("mutable",
96+
str(int(self.mutable)));
97+
if index == 1:
98+
return self.valobj.CreateValueFromExpression("inline",
99+
str(int(self.inline)));
100+
if index == 2:
101+
return self.valobj.CreateValueFromExpression("explicit",
102+
str(int(self.explicit)));
103+
if index == 3:
104+
return self.valobj.CreateValueFromExpression("unicode",
105+
str(int(self.unicode)));
106+
if index == 4:
107+
return self.valobj.CreateValueFromExpression("special",
108+
str(int(self.special)));
109+
if index == 5:
110+
if self.unicode == True:
111+
return self.handle_unicode_string();
112+
elif self.special == True:
113+
return self.handle_special();
114+
elif self.inline == True:
115+
return self.handle_UTF8_inline();
116+
else:
117+
return self.handle_UTF8_not_inline();
118+
def get_child_index(self,name):
119+
if name == "content":
120+
return self.num_children() - 1;
121+
if name == "mutable":
122+
return 0;
123+
if name == "inline":
124+
return 1;
125+
if name == "explicit":
126+
return 2;
127+
if name == "unicode":
128+
return 3;
129+
if name == "special":
130+
return 4;
131+
def is_64bit(self):
132+
if self.valobj.GetTarget().GetProcess().GetAddressByteSize() == 8:
133+
return True;
134+
else:
135+
return False;
136+
def is_little_endian(self):
137+
# 4 is eByteOrderLittle
138+
if self.valobj.GetTarget().GetProcess().GetByteOrder() == 4:
139+
return True;
140+
else:
141+
return False;
142+
# CFRuntimeBase is defined as having an additional
143+
# 4 bytes (padding?) on LP64 architectures
144+
# to get its size we add up sizeof(pointer)+4
145+
# and then add 4 more bytes if we are on a 64bit system
146+
def size_of_cfruntime_base(self):
147+
if self.lp64 == True:
148+
return 8+4+4;
149+
else:
150+
return 4+4;
151+
# the info bits are part of the CFRuntimeBase structure
152+
# to get at them we have to skip a uintptr_t and then get
153+
# at the least-significant byte of a 4 byte array. If we are
154+
# on big-endian this means going to byte 3, if we are on
155+
# little endian (OSX & iOS), this means reading byte 0
156+
def offset_of_info_bits(self):
157+
if self.lp64 == True:
158+
offset = 8;
159+
else:
160+
offset = 4;
161+
if self.is_little == False:
162+
offset = offset + 3;
163+
return offset;
164+
def read_info_bits(self):
165+
cfinfo = self.valobj.CreateChildAtOffset("cfinfo",
166+
self.offset_of_info_bits(),
167+
self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar));
168+
cfinfo.SetFormat(11)
169+
info = cfinfo.GetValue();
170+
return int(info,0);
171+
# calculating internal flag bits of the CFString object
172+
# this stuff is defined and discussed in CFString.c
173+
def is_mutable(self):
174+
return (self.info_bits & 1) == 1;
175+
def is_inline(self):
176+
return (self.info_bits & 0x60) == 0;
177+
# this flag's name is ambiguous, it turns out
178+
# we must skip a length byte to get at the data
179+
# when this flag is False
180+
def has_explicit_length(self):
181+
return (self.info_bits & (1 | 4)) != 4;
182+
# probably a subclass of NSString. obtained this from [str pathExtension]
183+
# here info_bits = 0 and Unicode data at the start of the padding word
184+
# in the long run using the isa value might be safer as a way to identify this
185+
# instead of reading the info_bits
186+
def is_special_case(self):
187+
return self.info_bits == 0;
188+
def is_unicode(self):
189+
return (self.info_bits & 0x10) == 0x10;
190+
# preparing ourselves to read into memory
191+
# by adjusting architecture-specific info
192+
def adjust_for_architecture(self):
193+
self.lp64 = self.is_64bit();
194+
self.is_little = self.is_little_endian();
195+
# reading info bits out of the CFString and computing
196+
# useful values to get at the real data
197+
def compute_flags(self):
198+
self.info_bits = self.read_info_bits();
199+
self.mutable = self.is_mutable();
200+
self.inline = self.is_inline();
201+
self.explicit = self.has_explicit_length();
202+
self.unicode = self.is_unicode();
203+
self.special = self.is_special_case();
204+
def update(self):
205+
self.adjust_for_architecture();
206+
self.compute_flags();

0 commit comments

Comments
 (0)