Skip to content

Commit b28632d

Browse files
committed
write stat
1 parent 1d7022f commit b28632d

File tree

6 files changed

+288
-38
lines changed

6 files changed

+288
-38
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
authorized_key.json
2+
stat.yaml

tools/greenplum-to-pg-tests/cmd/extraxtSessions.go

Lines changed: 254 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,25 +13,33 @@ import (
1313
"math"
1414
"os"
1515
"regexp"
16+
"slices"
1617
"strings"
1718
"time"
1819

1920
"github.com/spf13/cobra"
2021
"github.com/ydb-platform/ydb-go-sdk/v3"
2122
"github.com/ydb-platform/ydb-go-sdk/v3/query"
23+
"gopkg.in/yaml.v3"
2224

2325
"github.com/ydb-platform/postgres-compatibility-tests/tools/greenplum-to-pg-tests/internal"
2426
)
2527

2628
var extractSessionsConfig struct {
27-
schemeDumpFile string
28-
sessionsLog string
29-
includeFailed bool
30-
ydbConnectionString string
31-
limitRequests int
32-
rulesFile string
33-
printKnownIssues bool
34-
errorLimit int
29+
schemeDumpFile string
30+
sessionsLog string
31+
includeFailed bool
32+
ydbConnectionString string
33+
limitRequests int
34+
rulesFile string
35+
printKnownIssues bool
36+
printQueryForKnownIssue bool
37+
filterReason string
38+
errorLimit int
39+
printErrorsInProgress bool
40+
printStats bool
41+
printProgressEveryQueries int
42+
writeStatPath string
3543
}
3644

3745
func init() {
@@ -43,10 +51,16 @@ func init() {
4351

4452
extractSessionsCmd.PersistentFlags().BoolVar(&extractSessionsConfig.includeFailed, "include-failed", false, "Extract sessions with failed transactions")
4553
extractSessionsCmd.PersistentFlags().StringVar(&extractSessionsConfig.ydbConnectionString, "ydb-connection", "grpc://localhost:2136/local", "Connection string to ydb server for check queries")
46-
extractSessionsCmd.PersistentFlags().IntVar(&extractSessionsConfig.limitRequests, "requests-limit", 1000, "Limit number of parse requests, 0 mean unlimited")
54+
extractSessionsCmd.PersistentFlags().IntVar(&extractSessionsConfig.limitRequests, "requests-limit", 100, "Limit number of parse requests, 0 mean unlimited")
4755
extractSessionsCmd.PersistentFlags().StringVar(&extractSessionsConfig.rulesFile, "rules-file", "issues.yaml", "Rules for detect issue. Set empty for skip read rules.")
4856
extractSessionsCmd.PersistentFlags().BoolVar(&extractSessionsConfig.printKnownIssues, "print-known-issues", false, "Print known issues instead of unknown")
57+
extractSessionsCmd.PersistentFlags().BoolVar(&extractSessionsConfig.printQueryForKnownIssue, "print-query-for-known-issues", true, "Print query for known issues")
4958
extractSessionsCmd.PersistentFlags().IntVar(&extractSessionsConfig.errorLimit, "print-errors-limit", 0, "Limit of printed errors. 0 mean infinite")
59+
extractSessionsCmd.PersistentFlags().StringVar(&extractSessionsConfig.filterReason, "reason-filter", "", "Filter printer queries and reasons by regexp")
60+
extractSessionsCmd.PersistentFlags().BoolVar(&extractSessionsConfig.printErrorsInProgress, "print-progress", false, "Print queries in progress")
61+
extractSessionsCmd.PersistentFlags().BoolVar(&extractSessionsConfig.printStats, "print-stats", true, "Print queries in progress")
62+
extractSessionsCmd.PersistentFlags().IntVar(&extractSessionsConfig.printProgressEveryQueries, "print-progress-every-queries", 10, "Periodically print progress")
63+
extractSessionsCmd.PersistentFlags().StringVar(&extractSessionsConfig.writeStatPath, "write-stat-file", "", "Path to write full stat file if need. Will write example of queries")
5064
}
5165

5266
// extraxtSessionsCmd represents the extraxtSessions command
@@ -89,7 +103,10 @@ var extractSessionsCmd = &cobra.Command{
89103
cancel()
90104

91105
sessions := readSessions()
106+
107+
log.Println("Start check queries")
92108
checkQueries(rules, schema, db, sessions)
109+
93110
},
94111
}
95112

@@ -108,7 +125,7 @@ func readSessions() []internal.Session {
108125
log.Println("Start reading file...")
109126
readLoop:
110127
for {
111-
if limitCount > 0 && counter > limitCount {
128+
if limitCount > 0 && counter >= limitCount {
112129
log.Println("Reached limit for parse request count:", limitCount)
113130
break
114131
}
@@ -188,37 +205,71 @@ readLoop:
188205
}
189206

190207
func checkQueries(rules Rules, pgSchema *internal.PgSchema, db *ydb.Driver, sessions []internal.Session) {
208+
reasonFilter := regexp.MustCompile(extractSessionsConfig.filterReason)
191209
checked := map[string]bool{}
192210

193-
limit := extractSessionsConfig.errorLimit
194-
if limit == 0 {
195-
limit = math.MaxInt
211+
errorLimit := extractSessionsConfig.errorLimit
212+
if errorLimit == 0 {
213+
errorLimit = math.MaxInt
214+
}
215+
216+
totalQueries := 0
217+
for _, session := range sessions {
218+
for _, transaction := range session.Transactions {
219+
totalQueries += len(transaction.Queries)
220+
}
196221
}
197222

223+
queryIndex := 0
224+
225+
var stats SessionStats
198226
for _, session := range sessions {
199227
for _, transaction := range session.Transactions {
200228
for _, pgQuery := range transaction.Queries {
229+
queryIndex++
230+
if queryIndex%extractSessionsConfig.printProgressEveryQueries == 0 {
231+
log.Printf("Checking query %8d/%v", queryIndex, totalQueries)
232+
}
201233
if checked[pgQuery.Text] {
202234
continue
203235
}
204236
checked[pgQuery.Text] = true
205237

206-
reason, checkResult := checkQuery(rules, db, pgQuery.Text)
238+
reason, checkResult := checkQuery(&stats, rules, db, pgQuery.Text)
239+
if !reasonFilter.MatchString(reason) {
240+
continue
241+
}
207242
if !extractSessionsConfig.printKnownIssues && checkResult == checkResultErrUnknown {
208-
log.Printf("Reason: %v\nQuery:%v\n\n", reason, pgQuery.Text)
209-
limit--
243+
if extractSessionsConfig.printErrorsInProgress {
244+
log.Printf("Reason: %v\nQuery:%v\n\n", reason, pgQuery.Text)
245+
}
246+
errorLimit--
210247
}
211248
if extractSessionsConfig.printKnownIssues && checkResult == checkResultErrKnown {
212-
log.Printf("Reason: %v", reason)
213-
limit--
249+
if extractSessionsConfig.printErrorsInProgress {
250+
log.Printf("Reason: %v", reason)
251+
if extractSessionsConfig.printQueryForKnownIssue {
252+
log.Printf("Query:\n%v\n\n", pgQuery.Text)
253+
}
254+
}
255+
errorLimit--
214256
}
215-
if limit == 0 {
216-
log.Println("Print error limit reached:", extractSessionsConfig.errorLimit)
257+
if errorLimit == 0 {
258+
log.Println("Error limit reached:", extractSessionsConfig.errorLimit)
217259
return
218260
}
219261
}
220262
}
221263
}
264+
265+
if extractSessionsConfig.printStats {
266+
stats.PrintStats()
267+
}
268+
if extractSessionsConfig.writeStatPath != "" {
269+
if err := stats.SaveToFile(extractSessionsConfig.writeStatPath); err != nil {
270+
log.Printf("Failed to write stat: %+v", err)
271+
}
272+
}
222273
}
223274

224275
type checkResultType int
@@ -229,7 +280,7 @@ const (
229280
checkResultErrUnknown
230281
)
231282

232-
func checkQuery(rules Rules, db *ydb.Driver, queryText string) (reason string, checkResult checkResultType) {
283+
func checkQuery(stat *SessionStats, rules Rules, db *ydb.Driver, queryText string) (reason string, checkResult checkResultType) {
233284
queryText = strings.TrimSpace(queryText)
234285
queryText = fixSchemaNames(queryText)
235286
queryText = fixCreateTable(queryText)
@@ -246,6 +297,7 @@ func checkQuery(rules Rules, db *ydb.Driver, queryText string) (reason string, c
246297
}
247298

248299
if err == nil {
300+
stat.CountOK()
249301
return "", checkResultOK
250302
}
251303

@@ -255,11 +307,13 @@ func checkQuery(rules Rules, db *ydb.Driver, queryText string) (reason string, c
255307
issues := internal.ExtractIssues(err)
256308

257309
if reason = rules.FindKnownIssue(queryText, issues); reason != "" {
310+
stat.CountKnown(reason, queryText)
258311
return reason, checkResultErrKnown
259312
}
260313

261314
reason = fmt.Sprintf("%v (%v): %#v", ydbErr.Name(), ydbErr.Code(), issues)
262315

316+
stat.CountUnknown(issues, queryText)
263317
return reason, checkResultErrUnknown
264318
}
265319

@@ -285,3 +339,182 @@ func fixCreateTable(queryText string) string {
285339
queryText = createTableRegexp.ReplaceAllString(queryText, "$1 __stub_primary_key SERIAL PRIMARY KEY,")
286340
return queryText
287341
}
342+
343+
type SessionStats struct {
344+
OkCount int
345+
TotalCount int
346+
347+
MatchToRules map[string]*CounterWithExample[string] // [rule name] query example
348+
UnknownProblems map[internal.YdbIssue]*CounterWithExample[internal.YdbIssue]
349+
}
350+
351+
func (s *SessionStats) CountOK() {
352+
s.OkCount++
353+
}
354+
355+
func (s *SessionStats) CountKnown(ruleName string, query string) {
356+
s.TotalCount++
357+
if s.MatchToRules == nil {
358+
s.MatchToRules = make(map[string]*CounterWithExample[string])
359+
}
360+
361+
var stat *CounterWithExample[string]
362+
var ok bool
363+
if stat, ok = s.MatchToRules[ruleName]; !ok {
364+
stat = &CounterWithExample[string]{
365+
ID: ruleName,
366+
Example: query,
367+
}
368+
s.MatchToRules[ruleName] = stat
369+
}
370+
371+
stat.Count++
372+
}
373+
374+
func (s *SessionStats) CountUnknown(issues []internal.YdbIssue, query string) {
375+
s.TotalCount++
376+
if s.UnknownProblems == nil {
377+
s.UnknownProblems = make(map[internal.YdbIssue]*CounterWithExample[internal.YdbIssue])
378+
}
379+
380+
for _, issue := range issues {
381+
var stat *CounterWithExample[internal.YdbIssue]
382+
var ok bool
383+
if stat, ok = s.UnknownProblems[issue]; !ok {
384+
stat = &CounterWithExample[internal.YdbIssue]{
385+
ID: issue,
386+
Example: query,
387+
}
388+
s.UnknownProblems[issue] = stat
389+
}
390+
stat.Count++
391+
}
392+
}
393+
394+
func (s *SessionStats) GetTopKnown(count int) []CounterWithExample[string] {
395+
return getTopCounter(s.MatchToRules, count)
396+
}
397+
398+
func (s *SessionStats) GetTopUnknown(count int) []CounterWithExample[internal.YdbIssue] {
399+
return getTopCounter(s.UnknownProblems, count)
400+
}
401+
402+
func (s *SessionStats) PrintStats() {
403+
fmt.Println("Queries stat.")
404+
fmt.Println("Ok Count:", s.OkCount)
405+
fmt.Println()
406+
fmt.Println("Known issues")
407+
SessionStats_printExampleCounter(getTopCounter(s.MatchToRules, 10))
408+
409+
fmt.Println("New issues")
410+
SessionStats_printExampleCounter(getTopCounter(s.UnknownProblems, 10))
411+
}
412+
413+
func SessionStats_printExampleCounter[K comparable](examples []CounterWithExample[K]) {
414+
for _, example := range examples {
415+
fmt.Printf(`
416+
Problem: %v
417+
Count: %v
418+
Example: %v
419+
420+
`, example.ID, example.Count, example.Example)
421+
}
422+
}
423+
424+
type CounterWithExample[K comparable] struct {
425+
ID K `yaml:"id"`
426+
Count int `yaml:"count"`
427+
Example string `yaml:"example"`
428+
}
429+
430+
func getTopCounter[K comparable](m map[K]*CounterWithExample[K], count int) []CounterWithExample[K] {
431+
res := make([]CounterWithExample[K], 0, len(m))
432+
for _, stat := range m {
433+
res = append(res, *stat)
434+
}
435+
436+
// Max counts
437+
slices.SortFunc(res, func(a, b CounterWithExample[K]) int {
438+
return b.Count - a.Count
439+
})
440+
441+
if count >= len(res) {
442+
return res
443+
}
444+
445+
return res[:count]
446+
}
447+
448+
func (s *SessionStats) SaveToFile(path string) error {
449+
var statFile struct {
450+
TotalCount int `yaml:"total_count"`
451+
OkCount int `yaml:"ok_count"`
452+
OkPercent float64 `yaml:"ok_percent"`
453+
UnknownIssues []CounterWithExample[internal.YdbIssue] `yaml:"unknown_issues"`
454+
KnownIssues []CounterWithExample[string] `yaml:"known_issues"`
455+
}
456+
457+
statFile.TotalCount = s.TotalCount
458+
statFile.OkCount = s.OkCount
459+
statFile.OkPercent = float64(s.OkCount) / float64(s.TotalCount) * 100
460+
statFile.UnknownIssues = s.GetTopUnknown(math.MaxInt)
461+
statFile.KnownIssues = s.GetTopKnown(math.MaxInt)
462+
463+
for i := range statFile.UnknownIssues {
464+
statFile.UnknownIssues[i].Example = cleanStringForLiteralYaml(statFile.UnknownIssues[i].Example)
465+
}
466+
for i := range statFile.KnownIssues {
467+
statFile.KnownIssues[i].Example = cleanStringForLiteralYaml(statFile.KnownIssues[i].Example)
468+
}
469+
470+
f, err := os.Create(path)
471+
if err != nil {
472+
return fmt.Errorf("failed to create file for write stat: %w", err)
473+
}
474+
defer f.Close()
475+
encoder := yaml.NewEncoder(f)
476+
if err = encoder.Encode(&statFile); err != nil {
477+
return fmt.Errorf("failed to write stat: %w", err)
478+
}
479+
return nil
480+
}
481+
482+
func cleanStringForLiteralYaml(s string) string {
483+
lines := strings.Split(s, "\n")
484+
for i, line := range lines {
485+
// trim ending space
486+
for strings.HasSuffix(line, " ") {
487+
line = strings.TrimSuffix(line, " ")
488+
}
489+
lines[i] = line
490+
}
491+
492+
s = strings.Join(lines, "\n")
493+
494+
sBytes := []byte(s)
495+
buf := &strings.Builder{}
496+
497+
// range over runes
498+
for i, r := range s {
499+
if isYamlPrintable(sBytes, i) {
500+
buf.WriteRune(r)
501+
} else {
502+
buf.WriteByte('X')
503+
}
504+
}
505+
506+
return buf.String()
507+
}
508+
509+
func isYamlPrintable(b []byte, i int) bool {
510+
// copy of yaml.is_printable
511+
return ((b[i] == 0x0A) || // . == #x0A
512+
(b[i] >= 0x20 && b[i] <= 0x7E) || // #x20 <= . <= #x7E
513+
(b[i] == 0xC2 && b[i+1] >= 0xA0) || // #0xA0 <= . <= #xD7FF
514+
(b[i] > 0xC2 && b[i] < 0xED) ||
515+
(b[i] == 0xED && b[i+1] < 0xA0) ||
516+
(b[i] == 0xEE) ||
517+
(b[i] == 0xEF && // #xE000 <= . <= #xFFFD
518+
!(b[i+1] == 0xBB && b[i+2] == 0xBF) && // && . != #xFEFF
519+
!(b[i+1] == 0xBF && (b[i+2] == 0xBE || b[i+2] == 0xBF))))
520+
}

tools/greenplum-to-pg-tests/cmd/issue_rules.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ import (
1212
)
1313

1414
type Rules struct {
15-
Issues []PgIssueRules
15+
StatTotalCount int `yaml:"stat_total_count"`
16+
Issues []PgIssueRules
1617
}
1718

1819
func (r *Rules) LoadFromFile(path string) error {
@@ -70,6 +71,7 @@ type PgIssueRules struct {
7071
QueryRegexp OneOrSliceString `yaml:"query_regexp"`
7172
Example string `yaml:"example"`
7273
Comment string `yaml:"comment"`
74+
Count int `yaml:"count"`
7375

7476
issuesRegexpCompiled []*regexp.Regexp
7577
queryRegexpCompiled []*regexp.Regexp

0 commit comments

Comments
 (0)