|
| 1 | +# synthetic children provider for CFString |
| 2 | +# (and related NSString class) |
| 3 | +import lldb |
| 4 | +class CFStringSynthProvider: |
| 5 | + def __init__(self,valobj,dict): |
| 6 | + self.valobj = valobj; |
| 7 | + self.update() |
| 8 | + # children other than "content" are for debugging only and must not be used in production code |
| 9 | + def num_children(self): |
| 10 | + return 6; |
| 11 | + def read_unicode(self, pointer): |
| 12 | + process = self.valobj.GetTarget().GetProcess() |
| 13 | + error = lldb.SBError() |
| 14 | + pystr = u'' |
| 15 | + # cannot do the read at once because the length value has |
| 16 | + # a weird encoding. better play it safe here |
| 17 | + while True: |
| 18 | + content = process.ReadMemory(pointer, 2, error) |
| 19 | + new_bytes = bytearray(content) |
| 20 | + b0 = new_bytes[0] |
| 21 | + b1 = new_bytes[1] |
| 22 | + pointer = pointer + 2 |
| 23 | + if b0 == 0 and b1 == 0: |
| 24 | + break |
| 25 | + # rearrange bytes depending on endianness |
| 26 | + # (do we really need this or is Cocoa going to |
| 27 | + # use Windows-compatible little-endian even |
| 28 | + # if the target is big endian?) |
| 29 | + if self.is_little: |
| 30 | + value = b1 * 256 + b0 |
| 31 | + else: |
| 32 | + value = b0 * 256 + b1 |
| 33 | + pystr = pystr + unichr(value) |
| 34 | + return pystr |
| 35 | + # handle the special case strings |
| 36 | + # only use the custom code for the tested LP64 case |
| 37 | + def handle_special(self): |
| 38 | + if self.lp64 == False: |
| 39 | + # for 32bit targets, use safe ObjC code |
| 40 | + return self.handle_unicode_string_safe() |
| 41 | + offset = 12 |
| 42 | + pointer = int(self.valobj.GetValue(), 0) + offset |
| 43 | + pystr = self.read_unicode(pointer) |
| 44 | + return self.valobj.CreateValueFromExpression("content", |
| 45 | + "(char*)\"" + pystr.encode('utf-8') + "\"") |
| 46 | + # last resort call, use ObjC code to read; the final aim is to |
| 47 | + # be able to strip this call away entirely and only do the read |
| 48 | + # ourselves |
| 49 | + def handle_unicode_string_safe(self): |
| 50 | + return self.valobj.CreateValueFromExpression("content", |
| 51 | + "(char*)\"" + self.valobj.GetObjectDescription() + "\""); |
| 52 | + def handle_unicode_string(self): |
| 53 | + # step 1: find offset |
| 54 | + if self.inline: |
| 55 | + pointer = int(self.valobj.GetValue(), 0) + self.size_of_cfruntime_base(); |
| 56 | + if self.explicit == False: |
| 57 | + # untested, use the safe code path |
| 58 | + return self.handle_unicode_string_safe(); |
| 59 | + else: |
| 60 | + # not sure why 8 bytes are skipped here |
| 61 | + # (lldb) mem read -c 50 0x00000001001154f0 |
| 62 | + # 0x1001154f0: 98 1a 85 71 ff 7f 00 00 90 07 00 00 01 00 00 00 ...q?........... |
| 63 | + # 0x100115500: 03 00 00 00 00 00 00 00 *c3 03 78 00 78 00 00 00 ........?.x.x... |
| 64 | + # 0x100115510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ |
| 65 | + # 0x100115520: 00 00 .. |
| 66 | + # content begins at * (i.e. 8 bytes into variants, skipping void* buffer in |
| 67 | + # __notInlineImmutable1 entirely, while the length byte is correctly located |
| 68 | + # for an inline string) |
| 69 | + pointer = pointer + 8; |
| 70 | + else: |
| 71 | + pointer = int(self.valobj.GetValue(), 0) + self.size_of_cfruntime_base(); |
| 72 | + # read 8 bytes here and make an address out of them |
| 73 | + vopointer = self.valobj.CreateChildAtOffset("dummy", |
| 74 | + pointer,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType()); |
| 75 | + pointer = int(vopointer.GetValue(), 0) |
| 76 | + # step 2: read Unicode data at pointer |
| 77 | + pystr = self.read_unicode(pointer) |
| 78 | + # step 3: return it |
| 79 | + return self.valobj.CreateValueFromExpression("content", |
| 80 | + "(char*)\"" + pystr.encode('utf-8') + "\"") |
| 81 | + # we read at "the right place" into the __CFString object instead of running code |
| 82 | + # we are replicating the functionality of __CFStrContents in CFString.c here |
| 83 | + def handle_UTF8_inline(self): |
| 84 | + offset = int(self.valobj.GetValue(), 0) + self.size_of_cfruntime_base(); |
| 85 | + if self.explicit == False: |
| 86 | + offset = offset + 1; |
| 87 | + return self.valobj.CreateValueFromAddress("content", |
| 88 | + offset, self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar)); |
| 89 | + def handle_UTF8_not_inline(self): |
| 90 | + offset = self.size_of_cfruntime_base(); |
| 91 | + return self.valobj.CreateChildAtOffset("content", |
| 92 | + offset,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType()); |
| 93 | + def get_child_at_index(self,index): |
| 94 | + if index == 0: |
| 95 | + return self.valobj.CreateValueFromExpression("mutable", |
| 96 | + str(int(self.mutable))); |
| 97 | + if index == 1: |
| 98 | + return self.valobj.CreateValueFromExpression("inline", |
| 99 | + str(int(self.inline))); |
| 100 | + if index == 2: |
| 101 | + return self.valobj.CreateValueFromExpression("explicit", |
| 102 | + str(int(self.explicit))); |
| 103 | + if index == 3: |
| 104 | + return self.valobj.CreateValueFromExpression("unicode", |
| 105 | + str(int(self.unicode))); |
| 106 | + if index == 4: |
| 107 | + return self.valobj.CreateValueFromExpression("special", |
| 108 | + str(int(self.special))); |
| 109 | + if index == 5: |
| 110 | + if self.unicode == True: |
| 111 | + return self.handle_unicode_string(); |
| 112 | + elif self.special == True: |
| 113 | + return self.handle_special(); |
| 114 | + elif self.inline == True: |
| 115 | + return self.handle_UTF8_inline(); |
| 116 | + else: |
| 117 | + return self.handle_UTF8_not_inline(); |
| 118 | + def get_child_index(self,name): |
| 119 | + if name == "content": |
| 120 | + return self.num_children() - 1; |
| 121 | + if name == "mutable": |
| 122 | + return 0; |
| 123 | + if name == "inline": |
| 124 | + return 1; |
| 125 | + if name == "explicit": |
| 126 | + return 2; |
| 127 | + if name == "unicode": |
| 128 | + return 3; |
| 129 | + if name == "special": |
| 130 | + return 4; |
| 131 | + def is_64bit(self): |
| 132 | + if self.valobj.GetTarget().GetProcess().GetAddressByteSize() == 8: |
| 133 | + return True; |
| 134 | + else: |
| 135 | + return False; |
| 136 | + def is_little_endian(self): |
| 137 | + # 4 is eByteOrderLittle |
| 138 | + if self.valobj.GetTarget().GetProcess().GetByteOrder() == 4: |
| 139 | + return True; |
| 140 | + else: |
| 141 | + return False; |
| 142 | + # CFRuntimeBase is defined as having an additional |
| 143 | + # 4 bytes (padding?) on LP64 architectures |
| 144 | + # to get its size we add up sizeof(pointer)+4 |
| 145 | + # and then add 4 more bytes if we are on a 64bit system |
| 146 | + def size_of_cfruntime_base(self): |
| 147 | + if self.lp64 == True: |
| 148 | + return 8+4+4; |
| 149 | + else: |
| 150 | + return 4+4; |
| 151 | + # the info bits are part of the CFRuntimeBase structure |
| 152 | + # to get at them we have to skip a uintptr_t and then get |
| 153 | + # at the least-significant byte of a 4 byte array. If we are |
| 154 | + # on big-endian this means going to byte 3, if we are on |
| 155 | + # little endian (OSX & iOS), this means reading byte 0 |
| 156 | + def offset_of_info_bits(self): |
| 157 | + if self.lp64 == True: |
| 158 | + offset = 8; |
| 159 | + else: |
| 160 | + offset = 4; |
| 161 | + if self.is_little == False: |
| 162 | + offset = offset + 3; |
| 163 | + return offset; |
| 164 | + def read_info_bits(self): |
| 165 | + cfinfo = self.valobj.CreateChildAtOffset("cfinfo", |
| 166 | + self.offset_of_info_bits(), |
| 167 | + self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar)); |
| 168 | + cfinfo.SetFormat(11) |
| 169 | + info = cfinfo.GetValue(); |
| 170 | + return int(info,0); |
| 171 | + # calculating internal flag bits of the CFString object |
| 172 | + # this stuff is defined and discussed in CFString.c |
| 173 | + def is_mutable(self): |
| 174 | + return (self.info_bits & 1) == 1; |
| 175 | + def is_inline(self): |
| 176 | + return (self.info_bits & 0x60) == 0; |
| 177 | + # this flag's name is ambiguous, it turns out |
| 178 | + # we must skip a length byte to get at the data |
| 179 | + # when this flag is False |
| 180 | + def has_explicit_length(self): |
| 181 | + return (self.info_bits & (1 | 4)) != 4; |
| 182 | + # probably a subclass of NSString. obtained this from [str pathExtension] |
| 183 | + # here info_bits = 0 and Unicode data at the start of the padding word |
| 184 | + # in the long run using the isa value might be safer as a way to identify this |
| 185 | + # instead of reading the info_bits |
| 186 | + def is_special_case(self): |
| 187 | + return self.info_bits == 0; |
| 188 | + def is_unicode(self): |
| 189 | + return (self.info_bits & 0x10) == 0x10; |
| 190 | + # preparing ourselves to read into memory |
| 191 | + # by adjusting architecture-specific info |
| 192 | + def adjust_for_architecture(self): |
| 193 | + self.lp64 = self.is_64bit(); |
| 194 | + self.is_little = self.is_little_endian(); |
| 195 | + # reading info bits out of the CFString and computing |
| 196 | + # useful values to get at the real data |
| 197 | + def compute_flags(self): |
| 198 | + self.info_bits = self.read_info_bits(); |
| 199 | + self.mutable = self.is_mutable(); |
| 200 | + self.inline = self.is_inline(); |
| 201 | + self.explicit = self.has_explicit_length(); |
| 202 | + self.unicode = self.is_unicode(); |
| 203 | + self.special = self.is_special_case(); |
| 204 | + def update(self): |
| 205 | + self.adjust_for_architecture(); |
| 206 | + self.compute_flags(); |
0 commit comments