Skip to content

Commit 8889bb1

Browse files
authored
Merge pull request #276 from python/master
Sync Fork from Upstream Repo
2 parents a92e893 + 4f17c5c commit 8889bb1

File tree

13 files changed

+157
-39
lines changed

13 files changed

+157
-39
lines changed

.azure-pipelines/posix-steps.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ steps:
4949
- script: ./venv/bin/python -m coverage xml
5050
displayName: 'Generate coverage.xml'
5151

52-
- script: source ./venv/bin/activate && bash <(curl -s https://codecov.io/bash)
52+
- script: source ./venv/bin/activate && bash <(curl -s https://codecov.io/bash) -y .github/codecov.yml
5353
displayName: 'Publish code coverage results'
5454

5555

.github/workflows/coverage.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ jobs:
6565
- name: 'Publish code coverage results'
6666
run: |
6767
source ./.venv/bin/activate
68-
bash <(curl -s https://codecov.io/bash)
68+
bash <(curl -s https://codecov.io/bash) -y .github/codecov.yml
6969
env:
7070
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
7171

@@ -84,6 +84,6 @@ jobs:
8484
if: always()
8585
run: |
8686
make pythoninfo
87-
bash <(curl -s https://codecov.io/bash)
87+
bash <(curl -s https://codecov.io/bash) -y .github/codecov.yml
8888
env:
8989
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

.travis.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ matrix:
9494
after_script: # Probably should be after_success once test suite updated to run under coverage.py.
9595
# Make the `coverage` command available to Codecov w/ a version of Python that can parse all source files.
9696
- source ./venv/bin/activate
97-
- bash <(curl -s https://codecov.io/bash)
97+
- bash <(curl -s https://codecov.io/bash) -y .github/codecov.yml
9898
- name: "Test code coverage (C)"
9999
os: linux
100100
language: c
@@ -111,7 +111,7 @@ matrix:
111111
- xvfb-run make -j4 coverage-report
112112
after_script: # Probably should be after_success once test suite updated to run under coverage.py.
113113
- make pythoninfo
114-
- bash <(curl -s https://codecov.io/bash)
114+
- bash <(curl -s https://codecov.io/bash) -y .github/codecov.yml
115115

116116

117117
before_install:

Lib/pkgutil.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -638,8 +638,8 @@ def get_data(package, resource):
638638
return loader.get_data(resource_name)
639639

640640

641-
_DOTTED_WORDS = r'[a-z_]\w*(\.[a-z_]\w*)*'
642-
_NAME_PATTERN = re.compile(f'^({_DOTTED_WORDS})(:({_DOTTED_WORDS})?)?$', re.I)
641+
_DOTTED_WORDS = r'(?!\d)(\w+)(\.(?!\d)(\w+))*'
642+
_NAME_PATTERN = re.compile(f'^(?P<pkg>{_DOTTED_WORDS})(?P<cln>:(?P<obj>{_DOTTED_WORDS})?)?$', re.U)
643643
del _DOTTED_WORDS
644644

645645
def resolve_name(name):
@@ -677,11 +677,12 @@ def resolve_name(name):
677677
m = _NAME_PATTERN.match(name)
678678
if not m:
679679
raise ValueError(f'invalid format: {name!r}')
680-
groups = m.groups()
681-
if groups[2]:
680+
gd = m.groupdict()
681+
if gd.get('cln'):
682682
# there is a colon - a one-step import is all that's needed
683-
mod = importlib.import_module(groups[0])
684-
parts = groups[3].split('.') if groups[3] else []
683+
mod = importlib.import_module(gd['pkg'])
684+
parts = gd.get('obj')
685+
parts = parts.split('.') if parts else []
685686
else:
686687
# no colon - have to iterate to find the package boundary
687688
parts = name.split('.')

Lib/test/test_pkgutil.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,40 @@ def test_name_resolution(self):
229229
('logging.handlers:SysLogHandler.NO_SUCH_VALUE', AttributeError),
230230
('logging.handlers.SysLogHandler.NO_SUCH_VALUE', AttributeError),
231231
('ZeroDivisionError', ImportError),
232+
('os.path.9abc', ValueError),
233+
('9abc', ValueError),
232234
)
233235

236+
# add some Unicode package names to the mix.
237+
238+
unicode_words = ('\u0935\u092e\u0938',
239+
'\xe9', '\xc8',
240+
'\uc548\ub155\ud558\uc138\uc694',
241+
'\u3055\u3088\u306a\u3089',
242+
'\u3042\u308a\u304c\u3068\u3046',
243+
'\u0425\u043e\u0440\u043e\u0448\u043e',
244+
'\u0441\u043f\u0430\u0441\u0438\u0431\u043e',
245+
'\u73b0\u4ee3\u6c49\u8bed\u5e38\u7528\u5b57\u8868')
246+
247+
for uw in unicode_words:
248+
d = os.path.join(self.dirname, uw)
249+
os.makedirs(d, exist_ok=True)
250+
# make an empty __init__.py file
251+
f = os.path.join(d, '__init__.py')
252+
with open(f, 'w') as f:
253+
f.write('')
254+
f.flush()
255+
# now import the package we just created; clearing the caches is
256+
# needed, otherwise the newly created package isn't found
257+
importlib.invalidate_caches()
258+
mod = importlib.import_module(uw)
259+
success_cases += (uw, mod),
260+
if len(uw) > 1:
261+
failure_cases += (uw[:-1], ImportError),
262+
263+
# add an example with a Unicode digit at the start
264+
failure_cases += ('\u0966\u0935\u092e\u0938', ValueError),
265+
234266
for s, expected in success_cases:
235267
with self.subTest(s=s):
236268
o = pkgutil.resolve_name(s)
Binary file not shown.
Binary file not shown.
Binary file not shown.

Modules/_xxtestfuzz/fuzz_tests.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ fuzz_json_loads
55
fuzz_sre_compile
66
fuzz_sre_match
77
fuzz_csv_reader
8+
fuzz_struct_unpack

Modules/_xxtestfuzz/fuzzer.c

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,69 @@ static int fuzz_builtin_unicode(const char* data, size_t size) {
7979
return 0;
8080
}
8181

82+
83+
PyObject* struct_unpack_method = NULL;
84+
PyObject* struct_error = NULL;
85+
/* Called by LLVMFuzzerTestOneInput for initialization */
86+
static int init_struct_unpack() {
87+
/* Import struct.unpack */
88+
PyObject* struct_module = PyImport_ImportModule("struct");
89+
if (struct_module == NULL) {
90+
return 0;
91+
}
92+
struct_error = PyObject_GetAttrString(struct_module, "error");
93+
if (struct_error == NULL) {
94+
return 0;
95+
}
96+
struct_unpack_method = PyObject_GetAttrString(struct_module, "unpack");
97+
return struct_unpack_method != NULL;
98+
}
99+
/* Fuzz struct.unpack(x, y) */
100+
static int fuzz_struct_unpack(const char* data, size_t size) {
101+
/* Everything up to the first null byte is considered the
102+
format. Everything after is the buffer */
103+
const char* first_null = memchr(data, '\0', size);
104+
if (first_null == NULL) {
105+
return 0;
106+
}
107+
108+
size_t format_length = first_null - data;
109+
size_t buffer_length = size - format_length - 1;
110+
111+
PyObject* pattern = PyBytes_FromStringAndSize(data, format_length);
112+
if (pattern == NULL) {
113+
return 0;
114+
}
115+
PyObject* buffer = PyBytes_FromStringAndSize(first_null + 1, buffer_length);
116+
if (buffer == NULL) {
117+
Py_DECREF(pattern);
118+
return 0;
119+
}
120+
121+
PyObject* unpacked = PyObject_CallFunctionObjArgs(
122+
struct_unpack_method, pattern, buffer, NULL);
123+
/* Ignore any overflow errors, these are easily triggered accidentally */
124+
if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_OverflowError)) {
125+
PyErr_Clear();
126+
}
127+
/* The pascal format string will throw a negative size when passing 0
128+
like: struct.unpack('0p', b'') */
129+
if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_SystemError)) {
130+
PyErr_Clear();
131+
}
132+
/* Ignore any struct.error exceptions, these can be caused by invalid
133+
formats or incomplete buffers both of which are common. */
134+
if (unpacked == NULL && PyErr_ExceptionMatches(struct_error)) {
135+
PyErr_Clear();
136+
}
137+
138+
Py_XDECREF(unpacked);
139+
Py_DECREF(pattern);
140+
Py_DECREF(buffer);
141+
return 0;
142+
}
143+
144+
82145
#define MAX_JSON_TEST_SIZE 0x10000
83146

84147
PyObject* json_loads_method = NULL;
@@ -190,9 +253,10 @@ static int fuzz_sre_compile(const char* data, size_t size) {
190253
PyErr_Clear();
191254
}
192255
/* Ignore some common errors thrown by sre_parse:
193-
Overflow, Assertion and Index */
256+
Overflow, Assertion, Recursion and Index */
194257
if (compiled == NULL && (PyErr_ExceptionMatches(PyExc_OverflowError) ||
195258
PyErr_ExceptionMatches(PyExc_AssertionError) ||
259+
PyErr_ExceptionMatches(PyExc_RecursionError) ||
196260
PyErr_ExceptionMatches(PyExc_IndexError))
197261
) {
198262
PyErr_Clear();
@@ -378,6 +442,16 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
378442
#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_unicode)
379443
rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
380444
#endif
445+
#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_struct_unpack)
446+
static int STRUCT_UNPACK_INITIALIZED = 0;
447+
if (!STRUCT_UNPACK_INITIALIZED && !init_struct_unpack()) {
448+
PyErr_Print();
449+
abort();
450+
} else {
451+
STRUCT_UNPACK_INITIALIZED = 1;
452+
}
453+
rv |= _run_fuzz(data, size, fuzz_struct_unpack);
454+
#endif
381455
#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
382456
static int JSON_LOADS_INITIALIZED = 0;
383457
if (!JSON_LOADS_INITIALIZED && !init_json_loads()) {

Parser/parsetok.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
240240
#endif
241241

242242
for (;;) {
243-
char *a, *b;
243+
const char *a, *b;
244244
int type;
245245
size_t len;
246246
char *str;
@@ -371,7 +371,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
371371
buffer after parsing. Trailing whitespace and comments
372372
are OK. */
373373
if (err_ret->error == E_DONE && start == single_input) {
374-
char *cur = tok->cur;
374+
const char *cur = tok->cur;
375375
char c = *tok->cur;
376376

377377
for (;;) {

Parser/tokenizer.c

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,9 @@ tok_new(void)
5959
sizeof(struct tok_state));
6060
if (tok == NULL)
6161
return NULL;
62-
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
62+
tok->buf = tok->cur = tok->inp = NULL;
63+
tok->start = NULL;
64+
tok->end = NULL;
6365
tok->done = E_OK;
6466
tok->fp = NULL;
6567
tok->input = NULL;
@@ -111,7 +113,9 @@ error_ret(struct tok_state *tok) /* XXX */
111113
tok->decoding_erred = 1;
112114
if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
113115
PyMem_FREE(tok->buf);
114-
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
116+
tok->buf = tok->cur = tok->inp = NULL;
117+
tok->start = NULL;
118+
tok->end = NULL;
115119
tok->done = E_DECODE;
116120
return NULL; /* as if it were EOF */
117121
}
@@ -664,11 +668,11 @@ translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
664668
Look for encoding declarations inside STR, and record them
665669
inside TOK. */
666670

667-
static const char *
671+
static char *
668672
decode_str(const char *input, int single, struct tok_state *tok)
669673
{
670674
PyObject* utf8 = NULL;
671-
const char *str;
675+
char *str;
672676
const char *s;
673677
const char *newl[2] = {NULL, NULL};
674678
int lineno = 0;
@@ -726,43 +730,46 @@ struct tok_state *
726730
PyTokenizer_FromString(const char *str, int exec_input)
727731
{
728732
struct tok_state *tok = tok_new();
733+
char *decoded;
734+
729735
if (tok == NULL)
730736
return NULL;
731-
str = decode_str(str, exec_input, tok);
732-
if (str == NULL) {
737+
decoded = decode_str(str, exec_input, tok);
738+
if (decoded == NULL) {
733739
PyTokenizer_Free(tok);
734740
return NULL;
735741
}
736742

737-
/* XXX: constify members. */
738-
tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
743+
tok->buf = tok->cur = tok->inp = decoded;
744+
tok->end = decoded;
739745
return tok;
740746
}
741747

742748
struct tok_state *
743749
PyTokenizer_FromUTF8(const char *str, int exec_input)
744750
{
745751
struct tok_state *tok = tok_new();
752+
char *translated;
746753
if (tok == NULL)
747754
return NULL;
748-
tok->input = str = translate_newlines(str, exec_input, tok);
749-
if (str == NULL) {
755+
tok->input = translated = translate_newlines(str, exec_input, tok);
756+
if (translated == NULL) {
750757
PyTokenizer_Free(tok);
751758
return NULL;
752759
}
753760
tok->decoding_state = STATE_RAW;
754761
tok->read_coding_spec = 1;
755762
tok->enc = NULL;
756-
tok->str = str;
763+
tok->str = translated;
757764
tok->encoding = (char *)PyMem_MALLOC(6);
758765
if (!tok->encoding) {
759766
PyTokenizer_Free(tok);
760767
return NULL;
761768
}
762769
strcpy(tok->encoding, "utf-8");
763770

764-
/* XXX: constify members. */
765-
tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
771+
tok->buf = tok->cur = tok->inp = translated;
772+
tok->end = translated;
766773
return tok;
767774
}
768775

@@ -812,7 +819,7 @@ PyTokenizer_Free(struct tok_state *tok)
812819
if (tok->fp != NULL && tok->buf != NULL)
813820
PyMem_FREE(tok->buf);
814821
if (tok->input)
815-
PyMem_FREE((char *)tok->input);
822+
PyMem_FREE(tok->input);
816823
PyMem_FREE(tok);
817824
}
818825

@@ -1138,7 +1145,7 @@ tok_decimal_tail(struct tok_state *tok)
11381145
/* Get next token, after space stripping etc. */
11391146

11401147
static int
1141-
tok_get(struct tok_state *tok, char **p_start, char **p_end)
1148+
tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
11421149
{
11431150
int c;
11441151
int blankline, nonascii;
@@ -1321,7 +1328,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
13211328
&& ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
13221329

13231330
if (is_type_ignore) {
1324-
*p_start = (char *) ignore_end;
1331+
*p_start = ignore_end;
13251332
*p_end = tok->cur;
13261333

13271334
/* If this type ignore is the only thing on the line, consume the newline also. */
@@ -1331,7 +1338,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
13311338
}
13321339
return TYPE_IGNORE;
13331340
} else {
1334-
*p_start = (char *) type_start; /* after type_comment_prefix */
1341+
*p_start = type_start; /* after type_comment_prefix */
13351342
*p_end = tok->cur;
13361343
return TYPE_COMMENT;
13371344
}
@@ -1410,7 +1417,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
14101417
Look ahead one token to see if that is 'def'. */
14111418

14121419
struct tok_state ahead_tok;
1413-
char *ahead_tok_start = NULL, *ahead_tok_end = NULL;
1420+
const char *ahead_tok_start = NULL;
1421+
const char *ahead_tok_end = NULL;
14141422
int ahead_tok_kind;
14151423

14161424
memcpy(&ahead_tok, tok, sizeof(ahead_tok));
@@ -1798,7 +1806,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
17981806
}
17991807

18001808
int
1801-
PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
1809+
PyTokenizer_Get(struct tok_state *tok, const char **p_start, const char **p_end)
18021810
{
18031811
int result = tok_get(tok, p_start, p_end);
18041812
if (tok->decoding_erred) {
@@ -1823,7 +1831,9 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
18231831
{
18241832
struct tok_state *tok;
18251833
FILE *fp;
1826-
char *p_start =NULL , *p_end =NULL , *encoding = NULL;
1834+
const char *p_start = NULL;
1835+
const char *p_end = NULL;
1836+
char *encoding = NULL;
18271837

18281838
fd = _Py_dup(fd);
18291839
if (fd < 0) {

0 commit comments

Comments
 (0)