Skip to content

Commit f469766

Browse files
committed
code cleanups, increment a 'window' pointer rather than an index
1 parent ed56aa0 commit f469766

File tree

1 file changed

+51
-39
lines changed

1 file changed

+51
-39
lines changed

Objects/stringlib/fastsearch.h

Lines changed: 51 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,6 @@ STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
287287
return cut;
288288
}
289289

290-
#define USE_TABLE
291290
#define SHIFT_TYPE uint16_t
292291
#define NOT_FOUND ((1U<<(8*sizeof(SHIFT_TYPE))) - 1U)
293292
#define SHIFT_OVERFLOW (NOT_FOUND - 1U)
@@ -326,7 +325,8 @@ STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
326325
shift = SHIFT_OVERFLOW;
327326
}
328327
p->table[needle[i] & TABLE_MASK] = Py_SAFE_DOWNCAST(shift,
329-
Py_ssize_t, SHIFT_TYPE);
328+
Py_ssize_t,
329+
SHIFT_TYPE);
330330
}
331331
}
332332

@@ -336,116 +336,128 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
336336
{
337337
// Crochemore and Perrin's (1991) Two-Way algorithm.
338338
// See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
339-
const STRINGLIB_CHAR *needle = p->needle;
340339
Py_ssize_t len_needle = p->len_needle;
341340
Py_ssize_t cut = p->cut;
342341
Py_ssize_t period = p->period;
343-
LOG("===== Checking \"%s\" in \"%s\". =====\n", needle, haystack);
342+
const STRINGLIB_CHAR *needle = p->needle;
343+
const STRINGLIB_CHAR *window = haystack;
344+
const STRINGLIB_CHAR *last_window = haystack + len_haystack - len_needle;
345+
LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
344346

345347
if (p->is_periodic) {
346348
LOG("Needle is periodic.\n");
347-
Py_ssize_t j = 0;
348349
Py_ssize_t memory = 0;
349-
while (j <= len_haystack - len_needle) {
350+
while (window <= last_window) {
350351
Py_ssize_t i = Py_MAX(cut, memory);
351352

352353
// Visualize the line-up:
353354
LOG("> "); LOG_STRING(haystack, len_haystack);
354-
LOG("\n> "); LOG("%*s", j, ""); LOG_STRING(needle, len_needle);
355-
LOG("\n> "); LOG("%*s", j + i, ""); LOG(" ^ <-- cut\n");
355+
LOG("\n> "); LOG("%*s", window - haystack, "");
356+
LOG_STRING(needle, len_needle);
357+
LOG("\n> "); LOG("%*s", window - haystack + i, "");
358+
LOG(" ^ <-- cut\n");
356359

357-
if (haystack[j + i] != needle[i++]) {
360+
if (window[i] != needle[i++]) {
358361
// Sunday's trick: if we're going to jump, we might
359362
// as well jump to line up the character *after* the
360363
// current window.
361-
STRINGLIB_CHAR first_outside = haystack[j + len_needle];
364+
STRINGLIB_CHAR first_outside = window[len_needle];
362365
SHIFT_TYPE shift = p->table[first_outside & TABLE_MASK];
363366
if (shift == NOT_FOUND) {
364-
LOG("\"%c\" not found. Skipping entirely.\n", first_outside);
365-
j += len_needle + 1;
367+
LOG("\"%c\" not found. Skipping entirely.\n",
368+
first_outside);
369+
window += len_needle + 1;
366370
}
367371
else {
368372
LOG("Shifting to line up \"%c\".\n", first_outside);
369-
j += shift;
373+
window += shift;
370374
}
371375
memory = 0;
372376
continue;
373377
}
374378

375-
while (i < len_needle && needle[i] == haystack[j + i]) {
379+
while (i < len_needle && needle[i] == window[i]) {
376380
i++;
377381
}
378382
if (i >= len_needle) {
379383
LOG("Right half matches.\n");
380384
i = cut - 1;
381-
while (i >= memory && needle[i] == haystack[j + i]) {
385+
while (i >= memory && needle[i] == window[i]) {
382386
i--;
383387
}
384388
if (i < memory) {
385-
LOG("Left half matches. Returning %d.\n", j);
386-
return j;
389+
LOG("Left half matches. Returning %d.\n",
390+
window - haystack);
391+
return window - haystack;
387392
}
388-
LOG("Left half does not match. Jump ahead by period %d.\n", period);
389-
j += period;
393+
LOG("Left half does not match. Jump ahead by period %d.\n",
394+
period);
395+
window += period;
390396
memory = len_needle - period;
391397
}
392398
else {
393-
LOG("Right half does not match. Jump ahead by %d.\n", i - cut + 1);
394-
j += i - cut + 1;
399+
LOG("Right half does not match. Jump ahead by %d.\n",
400+
i - cut + 1);
401+
window += i - cut + 1;
395402
memory = 0;
396403
}
397404
}
398405
}
399406
else {
400407
period = Py_MAX(cut, len_needle - cut) + 1;
401408
LOG("Needle is not periodic.\n");
402-
Py_ssize_t j = 0;
403409
assert(cut < len_needle);
404410
STRINGLIB_CHAR needle_cut = needle[cut];
405-
while (j <= len_haystack - len_needle) {
411+
while (window <= last_window) {
406412

407413
// Visualize the line-up:
408414
LOG("> "); LOG_STRING(haystack, len_haystack);
409-
LOG("\n> "); LOG("%*s", j, ""); LOG_STRING(needle, len_needle);
410-
LOG("\n> "); LOG("%*s", j + cut, ""); LOG(" ^ <-- cut\n");
415+
LOG("\n> "); LOG("%*s", window - haystack, "");
416+
LOG_STRING(needle, len_needle);
417+
LOG("\n> "); LOG("%*s", window - haystack + cut, "");
418+
LOG(" ^ <-- cut\n");
411419

412-
if (haystack[j + cut] != needle_cut) {
420+
if (window[cut] != needle_cut) {
413421
// Sunday's trick: if we're going to jump, we might
414422
// as well jump to line up the character *after* the
415423
// current window.
416-
STRINGLIB_CHAR first_outside = haystack[j + len_needle];
424+
STRINGLIB_CHAR first_outside = window[len_needle];
417425
SHIFT_TYPE shift = p->table[first_outside & TABLE_MASK];
418426
if (shift == NOT_FOUND) {
419-
LOG("\"%c\" not found. Skipping entirely.\n", first_outside);
420-
j += len_needle + 1;
427+
LOG("\"%c\" not found. Skipping entirely.\n",
428+
first_outside);
429+
window += len_needle + 1;
421430
}
422431
else {
423432
LOG("Shifting to line up \"%c\".\n", first_outside);
424-
j += shift;
433+
window += shift;
425434
}
426435
continue;
427436
}
428437

429438
Py_ssize_t i = cut + 1;
430-
while (i < len_needle && needle[i] == haystack[j + i]) {
439+
while (i < len_needle && needle[i] == window[i]) {
431440
i++;
432441
}
433442
if (i >= len_needle) {
434443
LOG("Right half matches.\n");
435444
i = cut - 1;
436-
while (i >= 0 && needle[i] == haystack[j + i]) {
445+
while (i >= 0 && needle[i] == window[i]) {
437446
i--;
438447
}
439448
if (i < 0){
440-
LOG("Left half matches. Returning %d.\n", j);
441-
return j;
449+
LOG("Left half matches. Returning %d.\n",
450+
window - haystack);
451+
return window - haystack;
442452
}
443-
LOG("Left half does not match. Advance by period %d.\n", period);
444-
j += period;
453+
LOG("Left half does not match. Advance by period %d.\n",
454+
period);
455+
window += period;
445456
}
446457
else {
447-
LOG("Right half does not match. Advance by %d.\n", i - cut + 1);
448-
j += i - cut + 1;
458+
LOG("Right half does not match. Advance by %d.\n",
459+
i - cut + 1);
460+
window += i - cut + 1;
449461
}
450462
}
451463
}
@@ -459,7 +471,7 @@ STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
459471
const STRINGLIB_CHAR *needle,
460472
Py_ssize_t len_needle)
461473
{
462-
LOG("##### Counting \"%s\" in \"%s\".\n", needle, haystack);
474+
LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
463475
Py_ssize_t index;
464476
index = STRINGLIB(find_char)(haystack,
465477
len_haystack - len_needle + 1,

0 commit comments

Comments
 (0)