Skip to content

Commit a29f858

Browse files
author
Ma Lin
authored
bpo-47256: Increasing the depth of backtracking in RE (GH-32411)
Limit the maximum capturing group to 2**30-1 on 64-bit platforms (it was 2**31-1). No change on 32-bit platforms (2**28-1). It allows to reduce the size of SRE(match_context): - On 32 bit platform: 36 bytes, no change. (msvc2022) - On 64 bit platform: 72 bytes -> 56 bytes. (msvc2022/gcc9.4) which leads to increasing the depth of backtracking.
1 parent 1c2fceb commit a29f858

File tree

3 files changed

+46
-44
lines changed

3 files changed

+46
-44
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:mod:`re` module, limit the maximum capturing group to 1,073,741,823 in
2+
64-bit build, this increases the depth of backtracking.

Modules/_sre/sre.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818
#define SRE_CODE Py_UCS4
1919
#if SIZEOF_SIZE_T > 4
2020
# define SRE_MAXREPEAT (~(SRE_CODE)0)
21-
# define SRE_MAXGROUPS ((~(SRE_CODE)0) / 2)
21+
# define SRE_MAXGROUPS ((SRE_CODE)INT32_MAX / 2)
2222
#else
2323
# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
24-
# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_SIZE_T / 2)
24+
# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_VOID_P / 2)
2525
#endif
2626

2727
typedef struct {
@@ -73,12 +73,12 @@ typedef struct {
7373
Py_ssize_t pos, endpos;
7474
int isbytes;
7575
int charsize; /* character size */
76-
/* registers */
77-
Py_ssize_t lastindex;
78-
Py_ssize_t lastmark;
79-
const void** mark;
8076
int match_all;
8177
int must_advance;
78+
/* marks */
79+
int lastmark;
80+
int lastindex;
81+
const void** mark;
8282
/* dynamically allocated stuff */
8383
char* data_stack;
8484
size_t data_stack_size;

Modules/_sre/sre_lib.h

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -450,20 +450,23 @@ do { \
450450

451451
#define MARK_PUSH(lastmark) \
452452
do if (lastmark >= 0) { \
453-
i = lastmark; /* ctx->lastmark may change if reallocated */ \
454-
DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
453+
size_t _marks_size = (lastmark+1) * sizeof(void*); \
454+
DATA_STACK_PUSH(state, state->mark, _marks_size); \
455455
} while (0)
456456
#define MARK_POP(lastmark) \
457457
do if (lastmark >= 0) { \
458-
DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
458+
size_t _marks_size = (lastmark+1) * sizeof(void*); \
459+
DATA_STACK_POP(state, state->mark, _marks_size, 1); \
459460
} while (0)
460461
#define MARK_POP_KEEP(lastmark) \
461462
do if (lastmark >= 0) { \
462-
DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
463+
size_t _marks_size = (lastmark+1) * sizeof(void*); \
464+
DATA_STACK_POP(state, state->mark, _marks_size, 0); \
463465
} while (0)
464466
#define MARK_POP_DISCARD(lastmark) \
465467
do if (lastmark >= 0) { \
466-
DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
468+
size_t _marks_size = (lastmark+1) * sizeof(void*); \
469+
DATA_STACK_POP_DISCARD(state, _marks_size); \
467470
} while (0)
468471

469472
#define JUMP_NONE 0
@@ -488,10 +491,10 @@ do { \
488491
ctx->pattern = pattern; \
489492
ctx->ptr = ptr; \
490493
DATA_ALLOC(SRE(match_context), nextctx); \
491-
nextctx->last_ctx_pos = ctx_pos; \
492-
nextctx->jump = jumpvalue; \
493494
nextctx->pattern = nextpattern; \
494495
nextctx->toplevel = toplevel_; \
496+
nextctx->jump = jumpvalue; \
497+
nextctx->last_ctx_pos = ctx_pos; \
495498
pattern = nextpattern; \
496499
ctx_pos = alloc_pos; \
497500
ctx = nextctx; \
@@ -507,18 +510,18 @@ do { \
507510
DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
508511

509512
typedef struct {
510-
Py_ssize_t last_ctx_pos;
511-
Py_ssize_t jump;
512-
const SRE_CHAR* ptr;
513-
const SRE_CODE* pattern;
514513
Py_ssize_t count;
515-
Py_ssize_t lastmark;
516-
Py_ssize_t lastindex;
517514
union {
518515
SRE_CODE chr;
519516
SRE_REPEAT* rep;
520517
} u;
518+
int lastmark;
519+
int lastindex;
520+
const SRE_CODE* pattern;
521+
const SRE_CHAR* ptr;
521522
int toplevel;
523+
int jump;
524+
Py_ssize_t last_ctx_pos;
522525
} SRE(match_context);
523526

524527
#define MAYBE_CHECK_SIGNALS \
@@ -558,8 +561,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
558561
{
559562
const SRE_CHAR* end = (const SRE_CHAR *)state->end;
560563
Py_ssize_t alloc_pos, ctx_pos = -1;
561-
Py_ssize_t i, ret = 0;
562-
Py_ssize_t jump;
564+
Py_ssize_t ret = 0;
565+
int jump;
563566
unsigned int sigcount=0;
564567

565568
SRE(match_context)* ctx;
@@ -607,20 +610,22 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
607610
/* <MARK> <gid> */
608611
TRACE(("|%p|%p|MARK %d\n", pattern,
609612
ptr, pattern[0]));
610-
i = pattern[0];
611-
if (i & 1)
612-
state->lastindex = i/2 + 1;
613-
if (i > state->lastmark) {
614-
/* state->lastmark is the highest valid index in the
615-
state->mark array. If it is increased by more than 1,
616-
the intervening marks must be set to NULL to signal
617-
that these marks have not been encountered. */
618-
Py_ssize_t j = state->lastmark + 1;
619-
while (j < i)
620-
state->mark[j++] = NULL;
621-
state->lastmark = i;
613+
{
614+
int i = pattern[0];
615+
if (i & 1)
616+
state->lastindex = i/2 + 1;
617+
if (i > state->lastmark) {
618+
/* state->lastmark is the highest valid index in the
619+
state->mark array. If it is increased by more than 1,
620+
the intervening marks must be set to NULL to signal
621+
that these marks have not been encountered. */
622+
int j = state->lastmark + 1;
623+
while (j < i)
624+
state->mark[j++] = NULL;
625+
state->lastmark = i;
626+
}
627+
state->mark[i] = ptr;
622628
}
623-
state->mark[i] = ptr;
624629
pattern++;
625630
DISPATCH;
626631

@@ -1373,9 +1378,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
13731378
/* match backreference */
13741379
TRACE(("|%p|%p|GROUPREF %d\n", pattern,
13751380
ptr, pattern[0]));
1376-
i = pattern[0];
13771381
{
1378-
Py_ssize_t groupref = i+i;
1382+
int groupref = pattern[0] * 2;
13791383
if (groupref >= state->lastmark) {
13801384
RETURN_FAILURE;
13811385
} else {
@@ -1398,9 +1402,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
13981402
/* match backreference */
13991403
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
14001404
ptr, pattern[0]));
1401-
i = pattern[0];
14021405
{
1403-
Py_ssize_t groupref = i+i;
1406+
int groupref = pattern[0] * 2;
14041407
if (groupref >= state->lastmark) {
14051408
RETURN_FAILURE;
14061409
} else {
@@ -1424,9 +1427,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
14241427
/* match backreference */
14251428
TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
14261429
ptr, pattern[0]));
1427-
i = pattern[0];
14281430
{
1429-
Py_ssize_t groupref = i+i;
1431+
int groupref = pattern[0] * 2;
14301432
if (groupref >= state->lastmark) {
14311433
RETURN_FAILURE;
14321434
} else {
@@ -1450,9 +1452,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
14501452
/* match backreference */
14511453
TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
14521454
ptr, pattern[0]));
1453-
i = pattern[0];
14541455
{
1455-
Py_ssize_t groupref = i+i;
1456+
int groupref = pattern[0] * 2;
14561457
if (groupref >= state->lastmark) {
14571458
RETURN_FAILURE;
14581459
} else {
@@ -1476,9 +1477,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
14761477
TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
14771478
ptr, pattern[0]));
14781479
/* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1479-
i = pattern[0];
14801480
{
1481-
Py_ssize_t groupref = i+i;
1481+
int groupref = pattern[0] * 2;
14821482
if (groupref >= state->lastmark) {
14831483
pattern += pattern[1];
14841484
DISPATCH;

0 commit comments

Comments
 (0)