Skip to content

Commit 2152ae9

Browse files
committed
[lit] Make internal diff work in pipelines
When using lit's internal shell, RUN lines like the following accidentally execute an external `diff` instead of lit's internal `diff`: ``` # RUN: program | diff file - # RUN: not diff file1 file2 | FileCheck %s ``` Such cases exist now, in `clang/test/Analysis` for example. We are preparing patches to ensure lit's internal `diff` is called in such cases, which will then fail because lit's internal `diff` cannot currently be used in pipelines and doesn't recognize `-` as a command-line option. To enable pipelines, this patch moves lit's `diff` implementation into an out-of-process script, similar to lit's `cat` implementation. A follow-up patch will implement `-` to mean stdin. Reviewed By: probinson, stella.stamenova Differential Revision: https://reviews.llvm.org/D66574 llvm-svn: 372035
1 parent c1c519d commit 2152ae9

File tree

5 files changed

+276
-241
lines changed

5 files changed

+276
-241
lines changed

llvm/utils/lit/lit/TestRunner.py

Lines changed: 1 addition & 223 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
from __future__ import absolute_import
2-
import difflib
32
import errno
4-
import functools
53
import io
64
import itertools
75
import getopt
@@ -361,218 +359,6 @@ def executeBuiltinMkdir(cmd, cmd_shenv):
361359
exitCode = 1
362360
return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False)
363361

364-
def executeBuiltinDiff(cmd, cmd_shenv):
365-
"""executeBuiltinDiff - Compare files line by line."""
366-
args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
367-
try:
368-
opts, args = getopt.gnu_getopt(args, "wbur", ["strip-trailing-cr"])
369-
except getopt.GetoptError as err:
370-
raise InternalShellError(cmd, "Unsupported: 'diff': %s" % str(err))
371-
372-
filelines, filepaths, dir_trees = ([] for i in range(3))
373-
ignore_all_space = False
374-
ignore_space_change = False
375-
unified_diff = False
376-
recursive_diff = False
377-
strip_trailing_cr = False
378-
for o, a in opts:
379-
if o == "-w":
380-
ignore_all_space = True
381-
elif o == "-b":
382-
ignore_space_change = True
383-
elif o == "-u":
384-
unified_diff = True
385-
elif o == "-r":
386-
recursive_diff = True
387-
elif o == "--strip-trailing-cr":
388-
strip_trailing_cr = True
389-
else:
390-
assert False, "unhandled option"
391-
392-
if len(args) != 2:
393-
raise InternalShellError(cmd, "Error: missing or extra operand")
394-
395-
def getDirTree(path, basedir=""):
396-
# Tree is a tuple of form (dirname, child_trees).
397-
# An empty dir has child_trees = [], a file has child_trees = None.
398-
child_trees = []
399-
for dirname, child_dirs, files in os.walk(os.path.join(basedir, path)):
400-
for child_dir in child_dirs:
401-
child_trees.append(getDirTree(child_dir, dirname))
402-
for filename in files:
403-
child_trees.append((filename, None))
404-
return path, sorted(child_trees)
405-
406-
def compareTwoFiles(filepaths):
407-
compare_bytes = False
408-
encoding = None
409-
filelines = []
410-
for file in filepaths:
411-
try:
412-
with open(file, 'r') as f:
413-
filelines.append(f.readlines())
414-
except UnicodeDecodeError:
415-
try:
416-
with io.open(file, 'r', encoding="utf-8") as f:
417-
filelines.append(f.readlines())
418-
encoding = "utf-8"
419-
except:
420-
compare_bytes = True
421-
422-
if compare_bytes:
423-
return compareTwoBinaryFiles(filepaths)
424-
else:
425-
return compareTwoTextFiles(filepaths, encoding)
426-
427-
def compareTwoBinaryFiles(filepaths):
428-
filelines = []
429-
for file in filepaths:
430-
with open(file, 'rb') as f:
431-
filelines.append(f.readlines())
432-
433-
exitCode = 0
434-
if hasattr(difflib, 'diff_bytes'):
435-
# python 3.5 or newer
436-
diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode())
437-
diffs = [diff.decode() for diff in diffs]
438-
else:
439-
# python 2.7
440-
func = difflib.unified_diff if unified_diff else difflib.context_diff
441-
diffs = func(filelines[0], filelines[1], filepaths[0], filepaths[1])
442-
443-
for diff in diffs:
444-
stdout.write(diff)
445-
exitCode = 1
446-
return exitCode
447-
448-
def compareTwoTextFiles(filepaths, encoding):
449-
filelines = []
450-
for file in filepaths:
451-
if encoding is None:
452-
with open(file, 'r') as f:
453-
filelines.append(f.readlines())
454-
else:
455-
with io.open(file, 'r', encoding=encoding) as f:
456-
filelines.append(f.readlines())
457-
458-
exitCode = 0
459-
def compose2(f, g):
460-
return lambda x: f(g(x))
461-
462-
f = lambda x: x
463-
if strip_trailing_cr:
464-
f = compose2(lambda line: line.rstrip('\r'), f)
465-
if ignore_all_space or ignore_space_change:
466-
ignoreSpace = lambda line, separator: separator.join(line.split())
467-
ignoreAllSpaceOrSpaceChange = functools.partial(ignoreSpace, separator='' if ignore_all_space else ' ')
468-
f = compose2(ignoreAllSpaceOrSpaceChange, f)
469-
470-
for idx, lines in enumerate(filelines):
471-
filelines[idx]= [f(line) for line in lines]
472-
473-
func = difflib.unified_diff if unified_diff else difflib.context_diff
474-
for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1]):
475-
stdout.write(diff)
476-
exitCode = 1
477-
return exitCode
478-
479-
def printDirVsFile(dir_path, file_path):
480-
if os.path.getsize(file_path):
481-
msg = "File %s is a directory while file %s is a regular file"
482-
else:
483-
msg = "File %s is a directory while file %s is a regular empty file"
484-
stdout.write(msg % (dir_path, file_path) + "\n")
485-
486-
def printFileVsDir(file_path, dir_path):
487-
if os.path.getsize(file_path):
488-
msg = "File %s is a regular file while file %s is a directory"
489-
else:
490-
msg = "File %s is a regular empty file while file %s is a directory"
491-
stdout.write(msg % (file_path, dir_path) + "\n")
492-
493-
def printOnlyIn(basedir, path, name):
494-
stdout.write("Only in %s: %s\n" % (os.path.join(basedir, path), name))
495-
496-
def compareDirTrees(dir_trees, base_paths=["", ""]):
497-
# Dirnames of the trees are not checked, it's caller's responsibility,
498-
# as top-level dirnames are always different. Base paths are important
499-
# for doing os.walk, but we don't put it into tree's dirname in order
500-
# to speed up string comparison below and while sorting in getDirTree.
501-
left_tree, right_tree = dir_trees[0], dir_trees[1]
502-
left_base, right_base = base_paths[0], base_paths[1]
503-
504-
# Compare two files or report file vs. directory mismatch.
505-
if left_tree[1] is None and right_tree[1] is None:
506-
return compareTwoFiles([os.path.join(left_base, left_tree[0]),
507-
os.path.join(right_base, right_tree[0])])
508-
509-
if left_tree[1] is None and right_tree[1] is not None:
510-
printFileVsDir(os.path.join(left_base, left_tree[0]),
511-
os.path.join(right_base, right_tree[0]))
512-
return 1
513-
514-
if left_tree[1] is not None and right_tree[1] is None:
515-
printDirVsFile(os.path.join(left_base, left_tree[0]),
516-
os.path.join(right_base, right_tree[0]))
517-
return 1
518-
519-
# Compare two directories via recursive use of compareDirTrees.
520-
exitCode = 0
521-
left_names = [node[0] for node in left_tree[1]]
522-
right_names = [node[0] for node in right_tree[1]]
523-
l, r = 0, 0
524-
while l < len(left_names) and r < len(right_names):
525-
# Names are sorted in getDirTree, rely on that order.
526-
if left_names[l] < right_names[r]:
527-
exitCode = 1
528-
printOnlyIn(left_base, left_tree[0], left_names[l])
529-
l += 1
530-
elif left_names[l] > right_names[r]:
531-
exitCode = 1
532-
printOnlyIn(right_base, right_tree[0], right_names[r])
533-
r += 1
534-
else:
535-
exitCode |= compareDirTrees([left_tree[1][l], right_tree[1][r]],
536-
[os.path.join(left_base, left_tree[0]),
537-
os.path.join(right_base, right_tree[0])])
538-
l += 1
539-
r += 1
540-
541-
# At least one of the trees has ended. Report names from the other tree.
542-
while l < len(left_names):
543-
exitCode = 1
544-
printOnlyIn(left_base, left_tree[0], left_names[l])
545-
l += 1
546-
while r < len(right_names):
547-
exitCode = 1
548-
printOnlyIn(right_base, right_tree[0], right_names[r])
549-
r += 1
550-
return exitCode
551-
552-
stderr = StringIO()
553-
stdout = StringIO()
554-
exitCode = 0
555-
try:
556-
for file in args:
557-
if not os.path.isabs(file):
558-
file = os.path.realpath(os.path.join(cmd_shenv.cwd, file))
559-
560-
if recursive_diff:
561-
dir_trees.append(getDirTree(file))
562-
else:
563-
filepaths.append(file)
564-
565-
if not recursive_diff:
566-
exitCode = compareTwoFiles(filepaths)
567-
else:
568-
exitCode = compareDirTrees(dir_trees)
569-
570-
except IOError as err:
571-
stderr.write("Error: 'diff' command failed, %s\n" % str(err))
572-
exitCode = 1
573-
574-
return ShellCommandResult(cmd, stdout.getvalue(), stderr.getvalue(), exitCode, False)
575-
576362
def executeBuiltinRm(cmd, cmd_shenv):
577363
"""executeBuiltinRm - Removes (deletes) files or directories."""
578364
args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
@@ -838,14 +624,6 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
838624
results.append(cmdResult)
839625
return cmdResult.exitCode
840626

841-
if cmd.commands[0].args[0] == 'diff':
842-
if len(cmd.commands) != 1:
843-
raise InternalShellError(cmd.commands[0], "Unsupported: 'diff' "
844-
"cannot be part of a pipeline")
845-
cmdResult = executeBuiltinDiff(cmd.commands[0], shenv)
846-
results.append(cmdResult)
847-
return cmdResult.exitCode
848-
849627
if cmd.commands[0].args[0] == 'rm':
850628
if len(cmd.commands) != 1:
851629
raise InternalShellError(cmd.commands[0], "Unsupported: 'rm' "
@@ -866,7 +644,7 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
866644
stderrTempFiles = []
867645
opened_files = []
868646
named_temp_files = []
869-
builtin_commands = set(['cat'])
647+
builtin_commands = set(['cat', 'diff'])
870648
builtin_commands_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "builtin_commands")
871649
# To avoid deadlock, we use a single stderr stream for piped
872650
# output. This is null until we have seen some output using

0 commit comments

Comments
 (0)