Skip to content

Commit b5cfd0a

Browse files
committed
[cmpcodesize] Begin extracting regular expressions
`cmpcodesize.compare` has a few responsibilities: matching regular expressions on otool output, and storing the results of those matches in dictionaries. Begin extracting the regular expression matching into a separate file, `cmpcodesize/regex.py`. This makes the code more modular, and allows for finer-grained unit tests.
1 parent b5fbe7f commit b5cfd0a

File tree

3 files changed

+76
-15
lines changed

3 files changed

+76
-15
lines changed

utils/cmpcodesize/cmpcodesize/compare.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import collections
44
from operator import itemgetter
55

6-
from cmpcodesize import otool
6+
from cmpcodesize import otool, regex
77

88
Prefixes = {
99
# Cpp
@@ -68,30 +68,24 @@ def addFunction(sizes, function, startAddr, endAddr, groupByPrefix):
6868
def readSizes(sizes, fileName, functionDetails, groupByPrefix):
6969
# Check if multiple architectures are supported by the object file.
7070
# Prefer arm64 if available.
71-
architectures = otool.fat_headers(fileName).split('\n')
72-
arch = None
73-
archPattern = re.compile('architecture ([\S]+)')
74-
for architecture in architectures:
75-
archMatch = archPattern.match(architecture)
76-
if archMatch:
77-
if arch is None:
78-
arch = archMatch.group(1)
79-
if "arm64" in arch:
80-
arch = "arm64"
81-
71+
fat_headers = otool.fat_headers(fileName)
72+
architecture = regex.architecture(fat_headers)
8273
if functionDetails:
8374
content = otool.load_commands(fileName,
84-
architecture=arch,
75+
architecture=architecture,
8576
include_text_sections=True).split('\n')
86-
content += otool.text_sections(fileName, architecture=arch).split('\n')
77+
content += otool.text_sections(fileName,
78+
architecture=architecture).split('\n')
8779
else:
88-
content = otool.load_commands(fileName, architecture=arch).split('\n')
80+
content = otool.load_commands(fileName,
81+
architecture=architecture).split('\n')
8982

9083
sectName = None
9184
currFunc = None
9285
startAddr = None
9386
endAddr = None
9487

88+
# FIXME: Move re calls into cmpcodesize.regex module.
9589
sectionPattern = re.compile(' +sectname ([\S]+)')
9690
sizePattern = re.compile(' +size ([\da-fx]+)')
9791
asmlinePattern = re.compile('^([0-9a-fA-F]+)\s')
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import re
2+
3+
4+
# Cache the compiled regex into a global object.
5+
_ARCHITECTURE_REGEX = re.compile('architecture (\S+)')
6+
7+
8+
def architecture(fat_headers):
9+
"""
10+
Given a string representing fat headers from an executable,
11+
returns one of the following:
12+
13+
1. arm64, if that is one of the architectures listed.
14+
2. If arm64 us not listed, the first architecture that is listed.
15+
3. None, if no architectures are listed.
16+
"""
17+
result = None
18+
for line in fat_headers.splitlines():
19+
match = _ARCHITECTURE_REGEX.match(line)
20+
if match:
21+
arch = match.group(1)
22+
if arch == 'arm64':
23+
return arch
24+
elif result is None:
25+
result = match.group(1)
26+
return result

utils/cmpcodesize/tests/test_regex.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import unittest
2+
3+
from cmpcodesize import regex
4+
5+
6+
class ArchitectureTestCase(unittest.TestCase):
7+
def test_no_architectures_listed_returns_none(self):
8+
headers = 'Fat headers\n' + \
9+
' cputype CPU_TYPE_X86_64\n'
10+
self.assertIsNone(regex.architecture(headers))
11+
12+
def test_arm64_listed_returns_arm64(self):
13+
headers = 'Fat headers\n' + \
14+
'architecture x86_64\n' + \
15+
' cputype CPU_TYPE_X86_64\n' + \
16+
'architecture arm64\n' + \
17+
' cputype CPU_TYPE_ARM64\n'
18+
self.assertEquals(regex.architecture(headers), 'arm64')
19+
20+
def test_arm64_not_listed_returns_first(self):
21+
headers = 'Fat headers\n' + \
22+
'architecture x86_64\n' + \
23+
' cputype CPU_TYPE_X86_64\n' + \
24+
'architecture i386\n' + \
25+
' cputype CPU_TYPE_I386\n'
26+
self.assertEquals(regex.architecture(headers), 'x86_64')
27+
28+
def test_libswiftcore_x86_64(self):
29+
# These are the headers for libswiftCore.dylib when built
30+
# for a Darwin x86_64 target.
31+
headers = 'Fat headers\n' + \
32+
'fat_magic FAT_MAGIC\n' + \
33+
'nfat_arch 1\n' + \
34+
'architecture x86_64\n' + \
35+
' cputype CPU_TYPE_X86_64\n' + \
36+
' cpusubtype CPU_SUBTYPE_X86_64_ALL\n' + \
37+
' capabilities 0x0\n' + \
38+
' offset 4096\n' + \
39+
' size 9029488\n' + \
40+
' align 2^12 (4096)\n'
41+
self.assertEquals(regex.architecture(headers), 'x86_64')

0 commit comments

Comments
 (0)