1
1
#
2
2
# Copyright (c) 2010 Doug Hellmann. All rights reserved.
3
3
#
4
- """Spelling checker extension for Sphinx.
5
- """
4
+ """Spelling checker extension for Sphinx."""
6
5
7
6
# TODO - Words with multiple uppercase letters treated as classes and ignored
8
7
9
8
import builtins
10
9
import importlib
11
10
import subprocess
12
11
import sys
13
- from xmlrpc import client as xmlrpc_client
14
12
13
+ import requests
15
14
from enchant .tokenize import Filter , get_tokenizer , tokenize , unit_tokenize
16
15
from sphinx .util import logging
17
16
@@ -22,18 +21,19 @@ class AcronymFilter(Filter):
22
21
"""If a word looks like an acronym (all upper case letters),
23
22
ignore it.
24
23
"""
24
+
25
25
def _skip (self , word ):
26
26
return (
27
- word .isupper () or # all caps
27
+ word .isupper () # all caps
28
+ or
28
29
# pluralized acronym ("URLs")
29
- (word [- 1 ].lower () == 's' and word [:- 1 ].isupper ())
30
+ (word [- 1 ].lower () == "s" and word [:- 1 ].isupper ())
30
31
)
31
32
32
33
33
34
class list_tokenize (tokenize ):
34
-
35
35
def __init__ (self , words ):
36
- super ().__init__ ('' )
36
+ super ().__init__ ("" )
37
37
self ._words = words
38
38
39
39
def next (self ):
@@ -44,8 +44,8 @@ def next(self):
44
44
45
45
46
46
class ContractionFilter (Filter ):
47
- """Strip common contractions from words.
48
- """
47
+ """Strip common contractions from words."""
48
+
49
49
splits = {
50
50
"aren't" : ["are" , "not" ],
51
51
"can't" : ["can" , "not" ],
@@ -138,8 +138,7 @@ def _split(self, word):
138
138
139
139
140
140
class IgnoreWordsFilter (Filter ):
141
- """Given a set of words, ignore them all.
142
- """
141
+ """Given a set of words, ignore them all."""
143
142
144
143
def __init__ (self , tokenizer , word_set ):
145
144
self .word_set = set (word_set )
@@ -150,7 +149,6 @@ def _skip(self, word):
150
149
151
150
152
151
class IgnoreWordsFilterFactory :
153
-
154
152
def __init__ (self , words ):
155
153
self .words = words
156
154
@@ -159,23 +157,31 @@ def __call__(self, tokenizer):
159
157
160
158
161
159
class PyPIFilterFactory (IgnoreWordsFilterFactory ):
162
- """Build an IgnoreWordsFilter for all of the names of packages on PyPI.
163
- """
160
+ """Build an IgnoreWordsFilter for all of the names of packages on PyPI."""
161
+
164
162
def __init__ (self ):
165
- client = xmlrpc_client .ServerProxy ('https://pypi.python.org/pypi' )
166
- super ().__init__ (client .list_packages ())
163
+ r = requests .get (
164
+ "https://pypi.org/simple/" ,
165
+ headers = {
166
+ "user-agent" : "sphinxcontrib.spelling" ,
167
+ "accept" : "application/vnd.pypi.simple.v1+json" ,
168
+ },
169
+ )
170
+ names = [i ["name" ] for i in r .json ()["projects" ]]
171
+ logger .debug ("retrieved %d project names from pypi.org" , len (names ))
172
+ super ().__init__ (names )
167
173
168
174
169
175
class PythonBuiltinsFilter (Filter ):
170
- """Ignore names of built-in Python symbols.
171
- """
176
+ """Ignore names of built-in Python symbols."""
177
+
172
178
def _skip (self , word ):
173
179
return hasattr (builtins , word )
174
180
175
181
176
182
class ImportableModuleFilter (Filter ):
177
- """Ignore names of modules that we could import.
178
- """
183
+ """Ignore names of modules that we could import."""
184
+
179
185
def __init__ (self , tokenizer ):
180
186
super ().__init__ (tokenizer )
181
187
self .found_modules = set (sys .builtin_module_names )
@@ -185,7 +191,7 @@ def __init__(self, tokenizer):
185
191
# valid module, which is consistent with the behavior before
186
192
# version 7.3.1. See
187
193
# https://github.com/sphinx-contrib/spelling/issues/141
188
- self .sought_modules .add (' __main__' )
194
+ self .sought_modules .add (" __main__" )
189
195
190
196
def _skip (self , word ):
191
197
# If the word looks like a python module filename, strip the
@@ -195,13 +201,13 @@ def _skip(self, word):
195
201
# it look like Sphinx is complaining about a commandline
196
202
# argument. See
197
203
# https://github.com/sphinx-contrib/spelling/issues/142
198
- if word .endswith (' .py' ):
204
+ if word .endswith (" .py" ):
199
205
logger .debug (
200
- ' removing .py extension from %r before searching for module' ,
201
- word )
206
+ " removing .py extension from %r before searching for module" , word
207
+ )
202
208
word = word [:- 3 ]
203
209
204
- valid_module_name = all (n .isidentifier () for n in word .split ('.' ))
210
+ valid_module_name = all (n .isidentifier () for n in word .split ("." ))
205
211
if not valid_module_name :
206
212
return False
207
213
@@ -214,8 +220,7 @@ def _skip(self, word):
214
220
# error out of distutils, or something else triggered
215
221
# by failing to be able to import a parent package to
216
222
# use the metadata to search for a subpackage.
217
- logger .debug ('find_spec(%r) failed, invalid module name: %s' ,
218
- word , err )
223
+ logger .debug ("find_spec(%r) failed, invalid module name: %s" , word , err )
219
224
else :
220
225
if mod is not None :
221
226
self .found_modules .add (word )
@@ -230,25 +235,28 @@ class ContributorFilter(IgnoreWordsFilter):
230
235
tokens that are in the set.
231
236
"""
232
237
233
- _pretty_format = (
234
- '%(trailers:key=Co-Authored-By,separator=%x0A)%x0A%an%x0A%cn'
235
- )
238
+ _pretty_format = "%(trailers:key=Co-Authored-By,separator=%x0A)%x0A%an%x0A%cn"
236
239
237
240
def __init__ (self , tokenizer ):
238
241
contributors = self ._get_contributors ()
239
242
super ().__init__ (tokenizer , contributors )
240
243
241
244
def _get_contributors (self ):
242
- logger .info ('Scanning contributors' )
243
- cmd = ['git' , 'log' , '--quiet' , '--no-color' ,
244
- f'--pretty=format:{ self ._pretty_format } ' ]
245
+ logger .info ("Scanning contributors" )
246
+ cmd = [
247
+ "git" ,
248
+ "log" ,
249
+ "--quiet" ,
250
+ "--no-color" ,
251
+ f"--pretty=format:{ self ._pretty_format } " ,
252
+ ]
245
253
246
254
try :
247
255
p = subprocess .run (cmd , check = True , stdout = subprocess .PIPE )
248
256
except (subprocess .CalledProcessError , FileNotFoundError ) as err :
249
- logger .warning (' Called: %s' , ' ' .join (cmd ))
250
- logger .warning (' Failed to scan contributors: %s' , err )
257
+ logger .warning (" Called: %s" , " " .join (cmd ))
258
+ logger .warning (" Failed to scan contributors: %s" , err )
251
259
return set ()
252
- output = p .stdout .decode (' utf-8' )
253
- tokenizer = get_tokenizer (' en_US' , filters = [])
260
+ output = p .stdout .decode (" utf-8" )
261
+ tokenizer = get_tokenizer (" en_US" , filters = [])
254
262
return {word for word , pos in tokenizer (output )}
0 commit comments