Skip to content

gh-41872: Parse docstrings with ast instead of string manipulation #127520

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
9e0835c
Instead of manual parsing of doc strongs, use ast.literal_eval to par…
srinivasreddy Dec 2, 2024
d9fe8dd
Add tests for doc strings
srinivasreddy Dec 2, 2024
ab176ab
Remove unnecessary comments
srinivasreddy Dec 2, 2024
c21f342
Fix typo
srinivasreddy Dec 2, 2024
d750ee5
Update Lib/pydoc.py
srinivasreddy Dec 3, 2024
38b13e5
Add breathing space
srinivasreddy Dec 3, 2024
4bb399d
Fix name collision / shadowing
srinivasreddy Dec 3, 2024
272ca67
Change the logic
srinivasreddy Dec 3, 2024
5ef8adb
Fix typo
srinivasreddy Dec 3, 2024
e306542
Update Lib/pydoc.py
srinivasreddy Dec 5, 2024
6ff1ac3
Address review comments
srinivasreddy Dec 5, 2024
8c073a5
Correct the logic
srinivasreddy Dec 5, 2024
91df3f0
Merge branch 'main' into gh-41872
srinivasreddy Dec 17, 2024
f122cc6
Add blurb
srinivasreddy Dec 17, 2024
08cf0a5
Merge branch 'main' into gh-41872
srinivasreddy Dec 18, 2024
5e6a78b
Remove the redundant binary file check
srinivasreddy Dec 18, 2024
f54711e
Update test cases
srinivasreddy Dec 18, 2024
fb5dc83
Update Lib/test/test_pydoc/test_pydoc.py
srinivasreddy Dec 18, 2024
bd0e7eb
Update Lib/test/test_pydoc/test_pydoc.py
srinivasreddy Dec 18, 2024
233fbd6
Update Lib/test/test_pydoc/test_pydoc.py
srinivasreddy Dec 18, 2024
ec7e431
Update Lib/test/test_pydoc/test_pydoc.py
srinivasreddy Dec 18, 2024
c004506
Update Lib/test/test_pydoc/test_pydoc.py
srinivasreddy Dec 18, 2024
204e5bb
Add commas
srinivasreddy Dec 19, 2024
dae74dd
Merge branch 'main' into gh-41872
srinivasreddy Jan 6, 2025
54519d9
Address review comments
srinivasreddy Jan 6, 2025
835fea2
Update Lib/test/test_pydoc/test_pydoc.py
srinivasreddy Jan 6, 2025
b7c42ea
Handle concatenated string, parentheses, newlines, and add more tests.
serhiy-storchaka Jan 7, 2025
6d6e983
Add more tests. Refactor tests.
serhiy-storchaka Jan 8, 2025
511b5d0
Update a NEWS entry.
serhiy-storchaka Jan 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 24 additions & 15 deletions Lib/pydoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class or function within a module or module in a package. If the
# the current directory is changed with os.chdir(), an incorrect
# path will be displayed.

import ast
import __future__
import builtins
import importlib._bootstrap
Expand Down Expand Up @@ -384,21 +385,29 @@ def ispackage(path):
return False

def source_synopsis(file):
line = file.readline()
while line[:1] == '#' or not line.strip():
line = file.readline()
if not line: break
line = line.strip()
if line[:4] == 'r"""': line = line[1:]
if line[:3] == '"""':
line = line[3:]
if line[-1:] == '\\': line = line[:-1]
while not line.strip():
line = file.readline()
if not line: break
result = line.split('"""')[0].strip()
else: result = None
return result
"""Return the one-line summary of a file object, if present"""

string = ''
try:
tokens = tokenize.generate_tokens(file.readline)
for tok_type, tok_string, _, _, _ in tokens:
if tok_type == tokenize.STRING:
string += tok_string
elif tok_type == tokenize.NEWLINE:
with warnings.catch_warnings():
# Ignore the "invalid escape sequence" warning.
warnings.simplefilter("ignore", SyntaxWarning)
docstring = ast.literal_eval(string)
if not isinstance(docstring, str):
return None
return docstring.strip().split('\n')[0].strip()
elif tok_type == tokenize.OP and tok_string in ('(', ')'):
string += tok_string
elif tok_type not in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
return None
except (tokenize.TokenError, UnicodeDecodeError, SyntaxError):
return None
return None

def synopsis(filename, cache={}):
"""Get the one-line summary out of a module file."""
Expand Down
77 changes: 77 additions & 0 deletions Lib/test/test_pydoc/test_pydoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import contextlib
import importlib.util
import inspect
import io
import pydoc
import py_compile
import keyword
Expand Down Expand Up @@ -899,6 +900,82 @@ def test_synopsis(self):
synopsis = pydoc.synopsis(TESTFN, {})
self.assertEqual(synopsis, 'line 1: h\xe9')

def test_source_synopsis(self):
def check(source, expected, encoding=None):
if isinstance(source, str):
source_file = StringIO(source)
else:
source_file = io.TextIOWrapper(io.BytesIO(source), encoding=encoding)
with source_file:
result = pydoc.source_synopsis(source_file)
self.assertEqual(result, expected)

check('"""Single line docstring."""',
'Single line docstring.')
check('"""First line of docstring.\nSecond line.\nThird line."""',
'First line of docstring.')
check('"""First line of docstring.\\nSecond line.\\nThird line."""',
'First line of docstring.')
check('""" Whitespace around docstring. """',
'Whitespace around docstring.')
check('import sys\n"""No docstring"""',
None)
check(' \n"""Docstring after empty line."""',
'Docstring after empty line.')
check('# Comment\n"""Docstring after comment."""',
'Docstring after comment.')
check(' # Indented comment\n"""Docstring after comment."""',
'Docstring after comment.')
check('""""""', # Empty docstring
'')
check('', # Empty file
None)
check('"""Embedded\0null byte"""',
None)
check('"""Embedded null byte"""\0',
None)
check('"""Café and résumé."""',
'Café and résumé.')
check("'''Triple single quotes'''",
'Triple single quotes')
check('"Single double quotes"',
'Single double quotes')
check("'Single single quotes'",
'Single single quotes')
check('"""split\\\nline"""',
'splitline')
check('"""Unrecognized escape \\sequence"""',
'Unrecognized escape \\sequence')
check('"""Invalid escape seq\\uence"""',
None)
check('r"""Raw \\stri\\ng"""',
'Raw \\stri\\ng')
check('b"""Bytes literal"""',
None)
check('f"""f-string"""',
None)
check('"""Concatenated""" \\\n"string" \'literals\'',
'Concatenatedstringliterals')
check('"""String""" + """expression"""',
None)
check('("""In parentheses""")',
'In parentheses')
check('("""Multiple lines """\n"""in parentheses""")',
'Multiple lines in parentheses')
check('()', # tuple
None)
check(b'# coding: iso-8859-15\n"""\xa4uro sign"""',
'€uro sign', encoding='iso-8859-15')
check(b'"""\xa4"""', # Decoding error
None, encoding='utf-8')

with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8') as temp_file:
temp_file.write('"""Real file test."""\n')
temp_file.flush()
temp_file.seek(0)
result = pydoc.source_synopsis(temp_file)
self.assertEqual(result, "Real file test.")

@requires_docstrings
def test_synopsis_sourceless(self):
os = import_helper.import_fresh_module('os')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix quick extraction of module docstrings from a file in :mod:`pydoc`.
It now supports docstrings with single quotes, escape sequences,
raw string literals, and other Python syntax.
Loading