Skip to content

Commit 48de2a7

Browse files
avargitster
authored andcommitted
grep: remove the kwset optimization
A later change will replace this optimization with optimistic use of PCRE v2. I'm completely removing it as an intermediate step, as opposed to replacing it with PCRE v2, to demonstrate that no grep semantics depend on this (or any other) optimization for the fixed backend anymore. For now this is mostly (but not entirely) a performance regression, as shown by this hacky one-liner: for opt in '' ' -i' do GIT_PERF_7821_GREP_OPTS=$opt GIT_PERF_REPEAT_COUNT=10 GIT_PERF_LARGE_REPO=~/g/linux GIT_PERF_MAKE_OPTS='-j8 CFLAGS=-O3 USE_LIBPCRE=YesPlease' ./run origin/master HEAD -- p7821-grep-engines-fixed.sh done && for opt in '' ' -i' do GIT_PERF_4221_LOG_OPTS=$opt GIT_PERF_REPEAT_COUNT=10 GIT_PERF_LARGE_REPO=~/g/linux GIT_PERF_MAKE_OPTS='-j8 CFLAGS=-O3 USE_LIBPCRE=YesPlease' ./run origin/master HEAD -- p4221-log-grep-engines-fixed.sh done Which produces: plain grep: Test origin/master HEAD ------------------------------------------------------------------------- 7821.1: fixed grep int 0.55(1.60+0.63) 0.82(3.11+0.51) +49.1% 7821.2: basic grep int 0.62(1.68+0.49) 0.85(3.02+0.52) +37.1% 7821.3: extended grep int 0.61(1.63+0.53) 0.91(3.09+0.44) +49.2% 7821.4: perl grep int 0.55(1.60+0.57) 0.41(0.93+0.57) -25.5% 7821.6: fixed grep uncommon 0.20(0.50+0.44) 0.35(1.27+0.42) +75.0% 7821.7: basic grep uncommon 0.20(0.49+0.45) 0.35(1.29+0.41) +75.0% 7821.8: extended grep uncommon 0.20(0.45+0.48) 0.35(1.25+0.44) +75.0% 7821.9: perl grep uncommon 0.20(0.53+0.41) 0.16(0.24+0.49) -20.0% 7821.11: fixed grep æ 0.35(1.27+0.40) 0.25(0.82+0.39) -28.6% 7821.12: basic grep æ 0.35(1.28+0.38) 0.25(0.75+0.44) -28.6% 7821.13: extended grep æ 0.36(1.21+0.46) 0.25(0.86+0.35) -30.6% 7821.14: perl grep æ 0.35(1.33+0.34) 0.16(0.26+0.47) -54.3% grep with -i: Test origin/master HEAD ----------------------------------------------------------------------------- 7821.1: fixed grep -i int 0.61(1.84+0.64) 1.11(4.12+0.64) +82.0% 7821.2: basic grep -i int 0.72(1.86+0.57) 1.15(4.48+0.49) +59.7% 7821.3: extended grep -i int 0.94(1.83+0.60) 1.53(4.12+0.58) +62.8% 7821.4: perl grep -i int 0.66(1.82+0.59) 0.55(1.08+0.58) -16.7% 7821.6: fixed grep -i uncommon 0.21(0.51+0.44) 0.44(1.74+0.34) +109.5% 7821.7: basic grep -i uncommon 0.21(0.55+0.41) 0.44(1.72+0.40) +109.5% 7821.8: extended grep -i uncommon 0.21(0.57+0.39) 0.42(1.64+0.45) +100.0% 7821.9: perl grep -i uncommon 0.21(0.48+0.48) 0.17(0.30+0.45) -19.0% 7821.11: fixed grep -i æ 0.25(0.73+0.45) 0.25(0.75+0.45) +0.0% 7821.12: basic grep -i æ 0.25(0.71+0.49) 0.26(0.77+0.44) +4.0% 7821.13: extended grep -i æ 0.25(0.75+0.44) 0.25(0.74+0.46) +0.0% 7821.14: perl grep -i æ 0.17(0.26+0.48) 0.16(0.20+0.52) -5.9% plain log: Test origin/master HEAD --------------------------------------------------------------------------------- 4221.1: fixed log --grep='int' 7.31(7.06+0.21) 8.11(7.85+0.20) +10.9% 4221.2: basic log --grep='int' 7.30(6.94+0.27) 8.16(7.89+0.19) +11.8% 4221.3: extended log --grep='int' 7.34(7.05+0.21) 8.08(7.76+0.25) +10.1% 4221.4: perl log --grep='int' 7.27(6.94+0.24) 7.05(6.76+0.25) -3.0% 4221.6: fixed log --grep='uncommon' 6.97(6.62+0.32) 7.86(7.51+0.30) +12.8% 4221.7: basic log --grep='uncommon' 7.05(6.69+0.29) 7.89(7.60+0.28) +11.9% 4221.8: extended log --grep='uncommon' 6.89(6.56+0.32) 7.99(7.66+0.24) +16.0% 4221.9: perl log --grep='uncommon' 7.02(6.66+0.33) 6.97(6.54+0.36) -0.7% 4221.11: fixed log --grep='æ' 7.37(7.03+0.33) 7.67(7.30+0.31) +4.1% 4221.12: basic log --grep='æ' 7.41(7.00+0.31) 7.60(7.28+0.26) +2.6% 4221.13: extended log --grep='æ' 7.35(6.96+0.38) 7.73(7.31+0.34) +5.2% 4221.14: perl log --grep='æ' 7.43(7.10+0.32) 6.95(6.61+0.27) -6.5% log with -i: Test origin/master HEAD ------------------------------------------------------------------------------------ 4221.1: fixed log -i --grep='int' 7.40(7.05+0.23) 8.66(8.38+0.20) +17.0% 4221.2: basic log -i --grep='int' 7.39(7.09+0.23) 8.67(8.39+0.20) +17.3% 4221.3: extended log -i --grep='int' 7.29(6.99+0.26) 8.69(8.31+0.26) +19.2% 4221.4: perl log -i --grep='int' 7.42(7.16+0.21) 7.14(6.80+0.24) -3.8% 4221.6: fixed log -i --grep='uncommon' 6.94(6.58+0.35) 8.43(8.04+0.30) +21.5% 4221.7: basic log -i --grep='uncommon' 6.95(6.62+0.31) 8.34(7.93+0.32) +20.0% 4221.8: extended log -i --grep='uncommon' 7.06(6.75+0.25) 8.32(7.98+0.31) +17.8% 4221.9: perl log -i --grep='uncommon' 6.96(6.69+0.26) 7.04(6.64+0.32) +1.1% 4221.11: fixed log -i --grep='æ' 7.92(7.55+0.33) 7.86(7.44+0.34) -0.8% 4221.12: basic log -i --grep='æ' 7.88(7.49+0.32) 7.84(7.46+0.34) -0.5% 4221.13: extended log -i --grep='æ' 7.91(7.51+0.32) 7.87(7.48+0.32) -0.5% 4221.14: perl log -i --grep='æ' 7.01(6.59+0.35) 6.99(6.64+0.28) -0.3% Some of those, as noted in [1] are because PCRE is faster at finding fixed strings. This looks bad for some engines, but in the next change we'll optimistically use PCRE v2 for all of these, so it'll look better. 1. https://public-inbox.org/git/[email protected]/ Signed-off-by: Ævar Arnfjörð Bjarmason <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 45d1f37 commit 48de2a7

File tree

2 files changed

+3
-62
lines changed

2 files changed

+3
-62
lines changed

grep.c

Lines changed: 3 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -356,18 +356,6 @@ static NORETURN void compile_regexp_failed(const struct grep_pat *p,
356356
die("%s'%s': %s", where, p->pattern, error);
357357
}
358358

359-
static int is_fixed(const char *s, size_t len)
360-
{
361-
size_t i;
362-
363-
for (i = 0; i < len; i++) {
364-
if (is_regex_special(s[i]))
365-
return 0;
366-
}
367-
368-
return 1;
369-
}
370-
371359
#ifdef USE_LIBPCRE1
372360
static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
373361
{
@@ -643,38 +631,12 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
643631

644632
p->word_regexp = opt->word_regexp;
645633
p->ignore_case = opt->ignore_case;
634+
p->fixed = opt->fixed;
646635

647636
if (memchr(p->pattern, 0, p->patternlen) && !opt->pcre2)
648637
die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2"));
649638

650-
/*
651-
* Even when -F (fixed) asks us to do a non-regexp search, we
652-
* may not be able to correctly case-fold when -i
653-
* (ignore-case) is asked (in which case, we'll synthesize a
654-
* regexp to match the pattern that matches regexp special
655-
* characters literally, while ignoring case differences). On
656-
* the other hand, even without -F, if the pattern does not
657-
* have any regexp special characters and there is no need for
658-
* case-folding search, we can internally turn it into a
659-
* simple string match using kws. p->fixed tells us if we
660-
* want to use kws.
661-
*/
662-
if (opt->fixed || is_fixed(p->pattern, p->patternlen))
663-
p->fixed = !p->ignore_case || !has_non_ascii(p->pattern);
664-
665-
if (p->fixed) {
666-
p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
667-
kwsincr(p->kws, p->pattern, p->patternlen);
668-
kwsprep(p->kws);
669-
return;
670-
}
671-
672639
if (opt->fixed) {
673-
/*
674-
* We come here when the pattern has the non-ascii
675-
* characters we cannot case-fold, and asked to
676-
* ignore-case.
677-
*/
678640
compile_fixed_regexp(p, opt);
679641
return;
680642
}
@@ -1042,9 +1004,7 @@ void free_grep_patterns(struct grep_opt *opt)
10421004
case GREP_PATTERN: /* atom */
10431005
case GREP_PATTERN_HEAD:
10441006
case GREP_PATTERN_BODY:
1045-
if (p->kws)
1046-
kwsfree(p->kws);
1047-
else if (p->pcre1_regexp)
1007+
if (p->pcre1_regexp)
10481008
free_pcre1_regexp(p);
10491009
else if (p->pcre2_pattern)
10501010
free_pcre2_pattern(p);
@@ -1104,29 +1064,12 @@ static void show_name(struct grep_opt *opt, const char *name)
11041064
opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
11051065
}
11061066

1107-
static int fixmatch(struct grep_pat *p, char *line, char *eol,
1108-
regmatch_t *match)
1109-
{
1110-
struct kwsmatch kwsm;
1111-
size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
1112-
if (offset == -1) {
1113-
match->rm_so = match->rm_eo = -1;
1114-
return REG_NOMATCH;
1115-
} else {
1116-
match->rm_so = offset;
1117-
match->rm_eo = match->rm_so + kwsm.size[0];
1118-
return 0;
1119-
}
1120-
}
1121-
11221067
static int patmatch(struct grep_pat *p, char *line, char *eol,
11231068
regmatch_t *match, int eflags)
11241069
{
11251070
int hit;
11261071

1127-
if (p->fixed)
1128-
hit = !fixmatch(p, line, eol, match);
1129-
else if (p->pcre1_regexp)
1072+
if (p->pcre1_regexp)
11301073
hit = !pcre1match(p, line, eol, match, eflags);
11311074
else if (p->pcre2_pattern)
11321075
hit = !pcre2match(p, line, eol, match, eflags);

grep.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ typedef int pcre2_compile_context;
3232
typedef int pcre2_match_context;
3333
typedef int pcre2_jit_stack;
3434
#endif
35-
#include "kwset.h"
3635
#include "thread-utils.h"
3736
#include "userdiff.h"
3837

@@ -97,7 +96,6 @@ struct grep_pat {
9796
pcre2_match_context *pcre2_match_context;
9897
pcre2_jit_stack *pcre2_jit_stack;
9998
uint32_t pcre2_jit_on;
100-
kwset_t kws;
10199
unsigned fixed:1;
102100
unsigned ignore_case:1;
103101
unsigned word_regexp:1;

0 commit comments

Comments
 (0)