Skip to content

Improve htmldocck.py error messages #30303

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 10, 2015
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 86 additions & 54 deletions src/etc/htmldocck.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@

"""

from __future__ import print_function
import sys
import os.path
import re
Expand Down Expand Up @@ -160,8 +161,13 @@ def close(self):
HTMLParser.close(self)
return self.__builder.close()

Command = namedtuple('Command', 'negated cmd args lineno')
Command = namedtuple('Command', 'negated cmd args lineno context')

class FailedCheck(Exception):
pass

class InvalidCheck(Exception):
pass

def concat_multi_lines(f):
"""returns a generator out of the file object, which
Expand Down Expand Up @@ -196,7 +202,7 @@ def concat_multi_lines(f):
catenated = ''

if lastline is not None:
raise RuntimeError('Trailing backslash in the end of file')
print_err(lineno, line, 'Trailing backslash at the end of the file')

LINE_PATTERN = re.compile(r'''
(?<=(?<!\S)@)(?P<negated>!?)
Expand All @@ -216,9 +222,10 @@ def get_commands(template):
cmd = m.group('cmd')
args = m.group('args')
if args and not args[:1].isspace():
raise RuntimeError('Invalid template syntax at line {}'.format(lineno+1))
print_err(lineno, line, 'Invalid template syntax')
continue
args = shlex.split(args)
yield Command(negated=negated, cmd=cmd, args=args, lineno=lineno+1)
yield Command(negated=negated, cmd=cmd, args=args, lineno=lineno+1, context=line)


def _flatten(node, acc):
Expand All @@ -242,8 +249,7 @@ def normalize_xpath(path):
elif path.startswith('.//'):
return path
else:
raise RuntimeError('Non-absolute XPath is not supported due to \
the implementation issue.')
raise InvalidCheck('Non-absolute XPath is not supported due to implementation issues')


class CachedFiles(object):
Expand All @@ -259,41 +265,40 @@ def resolve_path(self, path):
self.last_path = path
return path
elif self.last_path is None:
raise RuntimeError('Tried to use the previous path in the first command')
raise InvalidCheck('Tried to use the previous path in the first command')
else:
return self.last_path

def get_file(self, path):
path = self.resolve_path(path)
try:
if path in self.files:
return self.files[path]
except KeyError:
try:
with open(os.path.join(self.root, path)) as f:
data = f.read()
except Exception as e:
raise RuntimeError('Cannot open file {!r}: {}'.format(path, e))
else:
self.files[path] = data
return data

abspath = os.path.join(self.root, path)
if not(os.path.exists(abspath) and os.path.isfile(abspath)):
raise FailedCheck('File does not exist {!r}'.format(path))

with open(abspath) as f:
data = f.read()
self.files[path] = data
return data

def get_tree(self, path):
path = self.resolve_path(path)
try:
if path in self.trees:
return self.trees[path]
except KeyError:
try:
f = open(os.path.join(self.root, path))
except Exception as e:
raise RuntimeError('Cannot open file {!r}: {}'.format(path, e))

abspath = os.path.join(self.root, path)
if not(os.path.exists(abspath) and os.path.isfile(abspath)):
raise FailedCheck('File does not exist {!r}'.format(path))

with open(abspath) as f:
try:
with f:
tree = ET.parse(f, CustomHTMLParser())
tree = ET.parse(f, CustomHTMLParser())
except Exception as e:
raise RuntimeError('Cannot parse an HTML file {!r}: {}'.format(path, e))
else:
self.trees[path] = tree
return self.trees[path]
self.trees[path] = tree
return self.trees[path]


def check_string(data, pat, regexp):
Expand All @@ -311,14 +316,14 @@ def check_tree_attr(tree, path, attr, pat, regexp):
path = normalize_xpath(path)
ret = False
for e in tree.findall(path):
try:
if attr in e.attrib:
value = e.attrib[attr]
except KeyError:
continue
else:
ret = check_string(value, pat, regexp)
if ret:
break
continue

ret = check_string(value, pat, regexp)
if ret:
break
return ret


Expand All @@ -341,57 +346,84 @@ def check_tree_count(tree, path, count):
path = normalize_xpath(path)
return len(tree.findall(path)) == count

def stderr(*args):
print(*args, file=sys.stderr)

def check(target, commands):
cache = CachedFiles(target)
for c in commands:
def print_err(lineno, context, err, message=None):
global ERR_COUNT
ERR_COUNT += 1
stderr("{}: {}".format(lineno, message or err))
if message and err:
stderr("\t{}".format(err))

if context:
stderr("\t{}".format(context))

ERR_COUNT = 0

def check_command(c, cache):
try:
cerr = ""
if c.cmd == 'has' or c.cmd == 'matches': # string test
regexp = (c.cmd == 'matches')
if len(c.args) == 1 and not regexp: # @has <path> = file existence
try:
cache.get_file(c.args[0])
ret = True
except RuntimeError:
except FailedCheck as err:
cerr = err.message
ret = False
elif len(c.args) == 2: # @has/matches <path> <pat> = string test
cerr = "`PATTERN` did not match"
ret = check_string(cache.get_file(c.args[0]), c.args[1], regexp)
elif len(c.args) == 3: # @has/matches <path> <pat> <match> = XML tree test
cerr = "`XPATH PATTERN` did not match"
tree = cache.get_tree(c.args[0])
pat, sep, attr = c.args[1].partition('/@')
if sep: # attribute
ret = check_tree_attr(cache.get_tree(c.args[0]), pat, attr, c.args[2], regexp)
tree = cache.get_tree(c.args[0])
ret = check_tree_attr(tree, pat, attr, c.args[2], regexp)
else: # normalized text
pat = c.args[1]
if pat.endswith('/text()'):
pat = pat[:-7]
ret = check_tree_text(cache.get_tree(c.args[0]), pat, c.args[2], regexp)
else:
raise RuntimeError('Invalid number of @{} arguments \
at line {}'.format(c.cmd, c.lineno))
raise InvalidCheck('Invalid number of @{} arguments'.format(c.cmd))

elif c.cmd == 'count': # count test
if len(c.args) == 3: # @count <path> <pat> <count> = count test
ret = check_tree_count(cache.get_tree(c.args[0]), c.args[1], int(c.args[2]))
else:
raise RuntimeError('Invalid number of @{} arguments \
at line {}'.format(c.cmd, c.lineno))

raise InvalidCheck('Invalid number of @{} arguments'.format(c.cmd))
elif c.cmd == 'valid-html':
raise RuntimeError('Unimplemented @valid-html at line {}'.format(c.lineno))
raise InvalidCheck('Unimplemented @valid-html')

elif c.cmd == 'valid-links':
raise RuntimeError('Unimplemented @valid-links at line {}'.format(c.lineno))

raise InvalidCheck('Unimplemented @valid-links')
else:
raise RuntimeError('Unrecognized @{} at line {}'.format(c.cmd, c.lineno))
raise InvalidCheck('Unrecognized @{}'.format(c.cmd))

if ret == c.negated:
raise RuntimeError('@{}{} check failed at line {}'.format('!' if c.negated else '',
c.cmd, c.lineno))
raise FailedCheck(cerr)

except FailedCheck as err:
message = '@{}{} check failed'.format('!' if c.negated else '', c.cmd)
print_err(c.lineno, c.context, err.message, message)
except InvalidCheck as err:
print_err(c.lineno, c.context, err.message)

def check(target, commands):
cache = CachedFiles(target)
for c in commands:
check_command(c, cache)

if __name__ == '__main__':
if len(sys.argv) < 3:
print >>sys.stderr, 'Usage: {} <doc dir> <template>'.format(sys.argv[0])
if len(sys.argv) != 3:
stderr('Usage: {} <doc dir> <template>'.format(sys.argv[0]))
raise SystemExit(1)

check(sys.argv[1], get_commands(sys.argv[2]))
if ERR_COUNT:
stderr("\nEncountered {} errors".format(ERR_COUNT))
raise SystemExit(1)
else:
check(sys.argv[1], get_commands(sys.argv[2]))