Skip to content

Commit d59c9cd

Browse files
committed
[lit] Fix some convoluted logic around Unicode encoding, and de-duplicate across modules that used it.
Summary: In Python2 and Python3, the various (non-)?Unicode string types are sort of spaghetti. Python2 has unicode support tacked on via the 'unicode' type, which is distinct from 'str' (which are bytes). Python3 takes the "unicode-everywhere" approach, with 'str' representing a Unicode string. Both have a 'bytes' type. In Python3, it is the only way to represent raw bytes. However, in Python2, 'bytes' is an alias for 'str'. This leads to interesting problems when an interface requires a precise type, but has to run under both Python2 and Python3. The previous logic appeared to be correct in all cases, but went through more layers of indirection than necessary. This change does the necessary conversions in one shot, with documentation about which paths might be taken in Python2 or Python3. Reviewers: zturner, modocache Subscribers: llvm-commits, sanjoy Differential Revision: https://reviews.llvm.org/D34793 llvm-svn: 306625
1 parent 17277f1 commit d59c9cd

File tree

2 files changed

+62
-51
lines changed

2 files changed

+62
-51
lines changed

llvm/utils/lit/lit/formats/googletest.py

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,22 @@ def getGTestTests(self, path, litConfig, localConfig):
3030
localConfig: TestingConfig instance"""
3131

3232
try:
33-
lines = lit.util.capture([path, '--gtest_list_tests'],
34-
env=localConfig.environment)
35-
if kIsWindows:
36-
lines = lines.replace('\r', '')
37-
lines = lines.split('\n')
38-
except Exception as exc:
39-
out = exc.output if isinstance(exc, subprocess.CalledProcessError) else ''
40-
litConfig.warning("unable to discover google-tests in %r: %s. Process output: %s"
41-
% (path, sys.exc_info()[1], out))
33+
output = subprocess.check_output([path, '--gtest_list_tests'],
34+
env=localConfig.environment)
35+
except subprocess.CalledProcessError as exc:
36+
litConfig.warning(
37+
"unable to discover google-tests in %r: %s. Process output: %s"
38+
% (path, sys.exc_info()[1], exc.output))
4239
raise StopIteration
4340

4441
nested_tests = []
45-
for ln in lines:
42+
for ln in output.splitlines(False): # Don't keep newlines.
43+
if 'Running main() from gtest_main.cc' in ln:
44+
# Upstream googletest prints this to stdout prior to running
45+
# tests. LLVM removed that print statement in r61540, but we
46+
# handle it here in case upstream googletest is being used.
47+
continue
48+
4649
# The test name list includes trailing comments beginning with
4750
# a '#' on some lines, so skip those. We don't support test names
4851
# that use escaping to embed '#' into their name as the names come
@@ -52,12 +55,6 @@ def getGTestTests(self, path, litConfig, localConfig):
5255
if not ln.lstrip():
5356
continue
5457

55-
if 'Running main() from gtest_main.cc' in ln:
56-
# Upstream googletest prints this to stdout prior to running
57-
# tests. LLVM removed that print statement in r61540, but we
58-
# handle it here in case upstream googletest is being used.
59-
continue
60-
6158
index = 0
6259
while ln[index*2:index*2+2] == ' ':
6360
index += 1

llvm/utils/lit/lit/util.py

Lines changed: 49 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,52 @@
88
import sys
99
import threading
1010

11-
def to_bytes(str):
12-
# Encode to UTF-8 to get binary data.
13-
if isinstance(str, bytes):
14-
return str
15-
return str.encode('utf-8')
16-
17-
def to_string(bytes):
18-
if isinstance(bytes, str):
19-
return bytes
20-
return to_bytes(bytes)
21-
22-
def convert_string(bytes):
11+
def to_bytes(s):
12+
"""Return the parameter as type 'bytes', possibly encoding it.
13+
14+
In Python2, the 'bytes' type is the same as 'str'. In Python3, they are
15+
distinct.
16+
"""
17+
if isinstance(s, bytes):
18+
# In Python2, this branch is taken for both 'str' and 'bytes'.
19+
# In Python3, this branch is taken only for 'bytes'.
20+
return s
21+
# In Python2, 's' is a 'unicode' object.
22+
# In Python3, 's' is a 'str' object.
23+
# Encode to UTF-8 to get 'bytes' data.
24+
return s.encode('utf-8')
25+
26+
def to_string(b):
27+
"""Return the parameter as type 'str', possibly encoding it.
28+
29+
In Python2, the 'str' type is the same as 'bytes'. In Python3, the
30+
'str' type is (essentially) Python2's 'unicode' type, and 'bytes' is
31+
distinct.
32+
"""
33+
if isinstance(b, str):
34+
# In Python2, this branch is taken for types 'str' and 'bytes'.
35+
# In Python3, this branch is taken only for 'str'.
36+
return b
37+
if isinstance(b, bytes):
38+
# In Python2, this branch is never taken ('bytes' is handled as 'str').
39+
# In Python3, this is true only for 'bytes'.
40+
return b.decode('utf-8')
41+
42+
# By this point, here's what we *don't* have:
43+
#
44+
# - In Python2:
45+
# - 'str' or 'bytes' (1st branch above)
46+
# - In Python3:
47+
# - 'str' (1st branch above)
48+
# - 'bytes' (2nd branch above)
49+
#
50+
# The last type we might expect is the Python2 'unicode' type. There is no
51+
# 'uncode' type in Python3 (all the Python3 cases were already handled). In
52+
# order to get a 'str' object, we need to encode the 'unicode' object.
2353
try:
24-
return to_string(bytes.decode('utf-8'))
25-
except AttributeError: # 'str' object has no attribute 'decode'.
26-
return str(bytes)
27-
except UnicodeError:
28-
return str(bytes)
54+
return b.encode('utf-8')
55+
except AttributeError:
56+
raise TypeError('not sure how to convert %s to %s' % (type(b), str))
2957

3058
def detectCPUs():
3159
"""
@@ -39,7 +67,8 @@ def detectCPUs():
3967
if isinstance(ncpus, int) and ncpus > 0:
4068
return ncpus
4169
else: # OSX:
42-
return int(capture(['sysctl', '-n', 'hw.ncpu']))
70+
return int(subprocess.check_output(['sysctl', '-n', 'hw.ncpu'],
71+
stderr=subprocess.STDOUT))
4372
# Windows:
4473
if "NUMBER_OF_PROCESSORS" in os.environ:
4574
ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])
@@ -67,21 +96,6 @@ def mkdir_p(path):
6796
if e.errno != errno.EEXIST:
6897
raise
6998

70-
def capture(args, env=None):
71-
"""capture(command) - Run the given command (or argv list) in a shell and
72-
return the standard output. Raises a CalledProcessError if the command
73-
exits with a non-zero status."""
74-
p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
75-
env=env)
76-
out, err = p.communicate()
77-
out = convert_string(out)
78-
err = convert_string(err)
79-
if p.returncode != 0:
80-
raise subprocess.CalledProcessError(cmd=args,
81-
returncode=p.returncode,
82-
output="{}\n{}".format(out, err))
83-
return out
84-
8599
def which(command, paths = None):
86100
"""which(command, [paths]) - Look up the given command in the paths string
87101
(or the PATH environment variable, if unspecified)."""
@@ -233,8 +247,8 @@ def killProcess():
233247
timerObject.cancel()
234248

235249
# Ensure the resulting output is always of string type.
236-
out = convert_string(out)
237-
err = convert_string(err)
250+
out = to_string(out)
251+
err = to_string(err)
238252

239253
if hitTimeOut[0]:
240254
raise ExecuteCommandTimeoutException(

0 commit comments

Comments
 (0)