Skip to content

Commit b63e7dc

Browse files
committed
Refactor to use for-loops, single cache-line table
1 parent e616bee commit b63e7dc

File tree

3 files changed

+60
-65
lines changed

3 files changed

+60
-65
lines changed

Lib/test/string_tests.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,8 @@ def reference_find(p, s):
339339
text, 'find', p)
340340

341341
def test_find_shift_table_overflow(self):
342-
"""When the table of 16-bit shifts overflows."""
343-
N = 2**16 + 100 # Overflow the 16-bit shift table
342+
"""When the table of 8-bit shifts overflows."""
343+
N = 2**8 + 100
344344

345345
# first check the periodic case
346346
# here, the shift for 'b' is N + 1.

Objects/stringlib/fastsearch.h

Lines changed: 51 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
189189
Py_ssize_t period = 1;
190190

191191
while (candidate + k < len_needle) {
192-
// loop increases candidate + k by 1 at each step
192+
// each loop increases candidate + k + max_suffix
193193
STRINGLIB_CHAR a = needle[candidate + k];
194194
STRINGLIB_CHAR b = needle[max_suffix + k];
195195
// check if the suffix at candidate is better than max_suffix
@@ -286,11 +286,11 @@ STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
286286
return cut;
287287
}
288288

289-
#define SHIFT_TYPE uint16_t
289+
#define SHIFT_TYPE uint8_t
290290
#define NOT_FOUND ((1U<<(8*sizeof(SHIFT_TYPE))) - 1U)
291291
#define SHIFT_OVERFLOW (NOT_FOUND - 1U)
292292

293-
#define TABLE_SIZE_BITS 7
293+
#define TABLE_SIZE_BITS 6
294294
#define TABLE_SIZE (1U << TABLE_SIZE_BITS)
295295
#define TABLE_MASK (TABLE_SIZE - 1U)
296296

@@ -315,8 +315,14 @@ STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
315315
p->is_periodic = (0 == memcmp(needle,
316316
needle + p->period,
317317
p->cut * STRINGLIB_SIZEOF_CHAR));
318-
assert(!p->is_periodic || (p->cut <= len_needle/2
319-
&& p->cut < p->period));
318+
if (p->is_periodic) {
319+
assert(p->cut <= len_needle/2);
320+
assert(p->cut < p->period);
321+
}
322+
else {
323+
// A lower bound on the period
324+
p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
325+
}
320326
// Now fill up a table
321327
memset(&(p->table[0]), 0xff, TABLE_SIZE*sizeof(SHIFT_TYPE));
322328
assert(p->table[0] == NOT_FOUND);
@@ -344,11 +350,13 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
344350
const STRINGLIB_CHAR *needle = p->needle;
345351
const STRINGLIB_CHAR *window = haystack;
346352
const STRINGLIB_CHAR *last_window = haystack + len_haystack - len_needle;
353+
SHIFT_TYPE *table = p->table;
347354
LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
348355

349356
if (p->is_periodic) {
350357
LOG("Needle is periodic.\n");
351358
Py_ssize_t memory = 0;
359+
periodicwindowloop:
352360
while (window <= last_window) {
353361
Py_ssize_t i = Py_MAX(cut, memory);
354362

@@ -364,7 +372,7 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
364372
// as well jump to line up the character *after* the
365373
// current window.
366374
STRINGLIB_CHAR first_outside = window[len_needle];
367-
SHIFT_TYPE shift = p->table[first_outside & TABLE_MASK];
375+
SHIFT_TYPE shift = table[first_outside & TABLE_MASK];
368376
if (shift == NOT_FOUND) {
369377
LOG("\"%c\" not found. Skipping entirely.\n",
370378
first_outside);
@@ -376,42 +384,36 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
376384
window += Py_MAX(shift, memory_shift);
377385
}
378386
memory = 0;
379-
continue;
387+
goto periodicwindowloop;
380388
}
381-
382-
i++;
383-
while (i < len_needle && needle[i] == window[i]) {
384-
i++;
385-
}
386-
if (i >= len_needle) {
387-
LOG("Right half matches.\n");
388-
i = cut - 1;
389-
while (i >= memory && needle[i] == window[i]) {
390-
i--;
389+
for (i = i + 1; i < len_needle; i++) {
390+
if (needle[i] != window[i]) {
391+
LOG("Right half does not match. Jump ahead by %d.\n",
392+
i - cut + 1);
393+
window += i - cut + 1;
394+
memory = 0;
395+
goto periodicwindowloop;
391396
}
392-
if (i < memory) {
393-
LOG("Left half matches. Returning %d.\n",
394-
window - haystack);
395-
return window - haystack;
396-
}
397-
LOG("Left half does not match. Jump ahead by period %d.\n",
398-
period);
399-
window += period;
400-
memory = len_needle - period;
401397
}
402-
else {
403-
LOG("Right half does not match. Jump ahead by %d.\n",
404-
i - cut + 1);
405-
window += i - cut + 1;
406-
memory = 0;
398+
for (i = memory; i < cut; i++) {
399+
if (needle[i] != window[i]) {
400+
LOG("Left half does not match. Jump ahead by period %d.\n",
401+
period);
402+
window += period;
403+
memory = len_needle - period;
404+
goto periodicwindowloop;
405+
}
407406
}
407+
LOG("Left half matches. Returning %d.\n",
408+
window - haystack);
409+
return window - haystack;
408410
}
409411
}
410412
else {
411-
period = Py_MAX(cut, len_needle - cut) + 1;
412413
LOG("Needle is not periodic.\n");
413414
assert(cut < len_needle);
414415
STRINGLIB_CHAR needle_cut = needle[cut];
416+
windowloop:
415417
while (window <= last_window) {
416418

417419
// Visualize the line-up:
@@ -426,7 +428,7 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
426428
// as well jump to line up the character *after* the
427429
// current window.
428430
STRINGLIB_CHAR first_outside = window[len_needle];
429-
SHIFT_TYPE shift = p->table[first_outside & TABLE_MASK];
431+
SHIFT_TYPE shift = table[first_outside & TABLE_MASK];
430432
if (shift == NOT_FOUND) {
431433
LOG("\"%c\" not found. Skipping entirely.\n",
432434
first_outside);
@@ -436,33 +438,26 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
436438
LOG("Shifting to line up \"%c\".\n", first_outside);
437439
window += shift;
438440
}
439-
continue;
440-
}
441-
442-
Py_ssize_t i = cut + 1;
443-
while (i < len_needle && needle[i] == window[i]) {
444-
i++;
441+
goto windowloop;
445442
}
446-
if (i >= len_needle) {
447-
LOG("Right half matches.\n");
448-
i = cut - 1;
449-
while (i >= 0 && needle[i] == window[i]) {
450-
i--;
443+
for (Py_ssize_t i = cut + 1; i < len_needle; i++) {
444+
if (needle[i] != window[i]) {
445+
LOG("Right half does not match. Advance by %d.\n",
446+
i - cut + 1);
447+
window += i - cut + 1;
448+
goto windowloop;
451449
}
452-
if (i < 0){
453-
LOG("Left half matches. Returning %d.\n",
454-
window - haystack);
455-
return window - haystack;
456-
}
457-
LOG("Left half does not match. Advance by period %d.\n",
458-
period);
459-
window += period;
460450
}
461-
else {
462-
LOG("Right half does not match. Advance by %d.\n",
463-
i - cut + 1);
464-
window += i - cut + 1;
451+
for (Py_ssize_t i = 0; i < cut; i++) {
452+
if (needle[i] != window[i]) {
453+
LOG("Left half does not match. Advance by period %d.\n",
454+
period);
455+
window += period;
456+
goto windowloop;
457+
}
465458
}
459+
LOG("Left half matches. Returning %d.\n", window - haystack);
460+
return window - haystack;
466461
}
467462
}
468463
LOG("Not found. Returning -1.\n");

Objects/stringlib/stringlib_find_two_way_notes.txt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ have linear time.
156156

157157
The sets of inequalities listed so far seem too good to be true in
158158
the general case. Indeed, they fail when a needle is periodic:
159-
there's no way to split 'AAbAAbAAbA' in two such that
159+
there's no way to split 'AAbAAbAAbA' in two such that
160160

161161
(the stuff n characters to the left of the split)
162162
cannot equal
@@ -166,8 +166,8 @@ there's no way to split 'AAbAAbAAbA' in two such that
166166
This is because no matter how you cut it, you'll get
167167
s[cut-3:cut] == s[cut:cut+3]. So what do we do? We still cut the
168168
needle in two so that n can be as big as possible. If we were to
169-
split it as
170-
169+
split it as
170+
171171
AAbA + AbAAbA
172172

173173
then A == A at the split, so this is bad (we failed at length 1), but
@@ -194,9 +194,9 @@ require that k go up to max(len(left_part), len(right_part)).
194194
So long as the period exceeds that, we're good.
195195

196196
The more general shorter-period case is a bit harder. The essentials
197-
are the same, except we use the periodicity to our advantage by
197+
are the same, except we use the periodicity to our advantage by
198198
"remembering" periods that we've already compared. In our running
199-
example, say we're computing
199+
example, say we're computing
200200

201201
"AAbAAbAAbA" in "bbbAbbAAbAAbAAbbbAAbAAbAAbAA".
202202

@@ -233,7 +233,7 @@ We cut as AA + bAAbAAbA, and then the algorithm runs as follows:
233233

234234
bbbAbbAAbAAbAAbbbAAbAAbAAbAA
235235
AAbAAbAAbA
236-
~ A != b at the cut
236+
~ A != b at the cut
237237
bbbAbbAAbAAbAAbbbAAbAAbAAbAA
238238
AAbAAbAAbA
239239
~~ AA != bA at the cut
@@ -421,7 +421,7 @@ more tricks for speed in fastsearch.h:
421421
For this reason, if the needle and haystack are long enough,
422422
only automatically start with two-way if the needle's length
423423
is a small percentage of the length of the haystack.
424-
424+
425425
3. In cases where the needle and haystack are large but the needle
426426
makes up a significant percentage of the length of the
427427
haystack, don't pay the expensive two-way preprocessing cost

0 commit comments

Comments
 (0)