@@ -13,25 +13,33 @@ import (
13
13
"math"
14
14
"os"
15
15
"regexp"
16
+ "slices"
16
17
"strings"
17
18
"time"
18
19
19
20
"github.com/spf13/cobra"
20
21
"github.com/ydb-platform/ydb-go-sdk/v3"
21
22
"github.com/ydb-platform/ydb-go-sdk/v3/query"
23
+ "gopkg.in/yaml.v3"
22
24
23
25
"github.com/ydb-platform/postgres-compatibility-tests/tools/greenplum-to-pg-tests/internal"
24
26
)
25
27
26
28
var extractSessionsConfig struct {
27
- schemeDumpFile string
28
- sessionsLog string
29
- includeFailed bool
30
- ydbConnectionString string
31
- limitRequests int
32
- rulesFile string
33
- printKnownIssues bool
34
- errorLimit int
29
+ schemeDumpFile string
30
+ sessionsLog string
31
+ includeFailed bool
32
+ ydbConnectionString string
33
+ limitRequests int
34
+ rulesFile string
35
+ printKnownIssues bool
36
+ printQueryForKnownIssue bool
37
+ filterReason string
38
+ errorLimit int
39
+ printErrorsInProgress bool
40
+ printStats bool
41
+ printProgressEveryQueries int
42
+ writeStatPath string
35
43
}
36
44
37
45
func init () {
@@ -43,10 +51,16 @@ func init() {
43
51
44
52
extractSessionsCmd .PersistentFlags ().BoolVar (& extractSessionsConfig .includeFailed , "include-failed" , false , "Extract sessions with failed transactions" )
45
53
extractSessionsCmd .PersistentFlags ().StringVar (& extractSessionsConfig .ydbConnectionString , "ydb-connection" , "grpc://localhost:2136/local" , "Connection string to ydb server for check queries" )
46
- extractSessionsCmd .PersistentFlags ().IntVar (& extractSessionsConfig .limitRequests , "requests-limit" , 1000 , "Limit number of parse requests, 0 mean unlimited" )
54
+ extractSessionsCmd .PersistentFlags ().IntVar (& extractSessionsConfig .limitRequests , "requests-limit" , 100 , "Limit number of parse requests, 0 mean unlimited" )
47
55
extractSessionsCmd .PersistentFlags ().StringVar (& extractSessionsConfig .rulesFile , "rules-file" , "issues.yaml" , "Rules for detect issue. Set empty for skip read rules." )
48
56
extractSessionsCmd .PersistentFlags ().BoolVar (& extractSessionsConfig .printKnownIssues , "print-known-issues" , false , "Print known issues instead of unknown" )
57
+ extractSessionsCmd .PersistentFlags ().BoolVar (& extractSessionsConfig .printQueryForKnownIssue , "print-query-for-known-issues" , true , "Print query for known issues" )
49
58
extractSessionsCmd .PersistentFlags ().IntVar (& extractSessionsConfig .errorLimit , "print-errors-limit" , 0 , "Limit of printed errors. 0 mean infinite" )
59
+ extractSessionsCmd .PersistentFlags ().StringVar (& extractSessionsConfig .filterReason , "reason-filter" , "" , "Filter printer queries and reasons by regexp" )
60
+ extractSessionsCmd .PersistentFlags ().BoolVar (& extractSessionsConfig .printErrorsInProgress , "print-progress" , false , "Print queries in progress" )
61
+ extractSessionsCmd .PersistentFlags ().BoolVar (& extractSessionsConfig .printStats , "print-stats" , true , "Print queries in progress" )
62
+ extractSessionsCmd .PersistentFlags ().IntVar (& extractSessionsConfig .printProgressEveryQueries , "print-progress-every-queries" , 10 , "Periodically print progress" )
63
+ extractSessionsCmd .PersistentFlags ().StringVar (& extractSessionsConfig .writeStatPath , "write-stat-file" , "" , "Path to write full stat file if need. Will write example of queries" )
50
64
}
51
65
52
66
// extraxtSessionsCmd represents the extraxtSessions command
@@ -89,7 +103,10 @@ var extractSessionsCmd = &cobra.Command{
89
103
cancel ()
90
104
91
105
sessions := readSessions ()
106
+
107
+ log .Println ("Start check queries" )
92
108
checkQueries (rules , schema , db , sessions )
109
+
93
110
},
94
111
}
95
112
@@ -108,7 +125,7 @@ func readSessions() []internal.Session {
108
125
log .Println ("Start reading file..." )
109
126
readLoop:
110
127
for {
111
- if limitCount > 0 && counter > limitCount {
128
+ if limitCount > 0 && counter >= limitCount {
112
129
log .Println ("Reached limit for parse request count:" , limitCount )
113
130
break
114
131
}
@@ -188,37 +205,71 @@ readLoop:
188
205
}
189
206
190
207
func checkQueries (rules Rules , pgSchema * internal.PgSchema , db * ydb.Driver , sessions []internal.Session ) {
208
+ reasonFilter := regexp .MustCompile (extractSessionsConfig .filterReason )
191
209
checked := map [string ]bool {}
192
210
193
- limit := extractSessionsConfig .errorLimit
194
- if limit == 0 {
195
- limit = math .MaxInt
211
+ errorLimit := extractSessionsConfig .errorLimit
212
+ if errorLimit == 0 {
213
+ errorLimit = math .MaxInt
214
+ }
215
+
216
+ totalQueries := 0
217
+ for _ , session := range sessions {
218
+ for _ , transaction := range session .Transactions {
219
+ totalQueries += len (transaction .Queries )
220
+ }
196
221
}
197
222
223
+ queryIndex := 0
224
+
225
+ var stats SessionStats
198
226
for _ , session := range sessions {
199
227
for _ , transaction := range session .Transactions {
200
228
for _ , pgQuery := range transaction .Queries {
229
+ queryIndex ++
230
+ if queryIndex % extractSessionsConfig .printProgressEveryQueries == 0 {
231
+ log .Printf ("Checking query %8d/%v" , queryIndex , totalQueries )
232
+ }
201
233
if checked [pgQuery .Text ] {
202
234
continue
203
235
}
204
236
checked [pgQuery .Text ] = true
205
237
206
- reason , checkResult := checkQuery (rules , db , pgQuery .Text )
238
+ reason , checkResult := checkQuery (& stats , rules , db , pgQuery .Text )
239
+ if ! reasonFilter .MatchString (reason ) {
240
+ continue
241
+ }
207
242
if ! extractSessionsConfig .printKnownIssues && checkResult == checkResultErrUnknown {
208
- log .Printf ("Reason: %v\n Query:%v\n \n " , reason , pgQuery .Text )
209
- limit --
243
+ if extractSessionsConfig .printErrorsInProgress {
244
+ log .Printf ("Reason: %v\n Query:%v\n \n " , reason , pgQuery .Text )
245
+ }
246
+ errorLimit --
210
247
}
211
248
if extractSessionsConfig .printKnownIssues && checkResult == checkResultErrKnown {
212
- log .Printf ("Reason: %v" , reason )
213
- limit --
249
+ if extractSessionsConfig .printErrorsInProgress {
250
+ log .Printf ("Reason: %v" , reason )
251
+ if extractSessionsConfig .printQueryForKnownIssue {
252
+ log .Printf ("Query:\n %v\n \n " , pgQuery .Text )
253
+ }
254
+ }
255
+ errorLimit --
214
256
}
215
- if limit == 0 {
216
- log .Println ("Print error limit reached:" , extractSessionsConfig .errorLimit )
257
+ if errorLimit == 0 {
258
+ log .Println ("Error limit reached:" , extractSessionsConfig .errorLimit )
217
259
return
218
260
}
219
261
}
220
262
}
221
263
}
264
+
265
+ if extractSessionsConfig .printStats {
266
+ stats .PrintStats ()
267
+ }
268
+ if extractSessionsConfig .writeStatPath != "" {
269
+ if err := stats .SaveToFile (extractSessionsConfig .writeStatPath ); err != nil {
270
+ log .Printf ("Failed to write stat: %+v" , err )
271
+ }
272
+ }
222
273
}
223
274
224
275
type checkResultType int
@@ -229,7 +280,7 @@ const (
229
280
checkResultErrUnknown
230
281
)
231
282
232
- func checkQuery (rules Rules , db * ydb.Driver , queryText string ) (reason string , checkResult checkResultType ) {
283
+ func checkQuery (stat * SessionStats , rules Rules , db * ydb.Driver , queryText string ) (reason string , checkResult checkResultType ) {
233
284
queryText = strings .TrimSpace (queryText )
234
285
queryText = fixSchemaNames (queryText )
235
286
queryText = fixCreateTable (queryText )
@@ -246,6 +297,7 @@ func checkQuery(rules Rules, db *ydb.Driver, queryText string) (reason string, c
246
297
}
247
298
248
299
if err == nil {
300
+ stat .CountOK ()
249
301
return "" , checkResultOK
250
302
}
251
303
@@ -255,11 +307,13 @@ func checkQuery(rules Rules, db *ydb.Driver, queryText string) (reason string, c
255
307
issues := internal .ExtractIssues (err )
256
308
257
309
if reason = rules .FindKnownIssue (queryText , issues ); reason != "" {
310
+ stat .CountKnown (reason , queryText )
258
311
return reason , checkResultErrKnown
259
312
}
260
313
261
314
reason = fmt .Sprintf ("%v (%v): %#v" , ydbErr .Name (), ydbErr .Code (), issues )
262
315
316
+ stat .CountUnknown (issues , queryText )
263
317
return reason , checkResultErrUnknown
264
318
}
265
319
@@ -285,3 +339,182 @@ func fixCreateTable(queryText string) string {
285
339
queryText = createTableRegexp .ReplaceAllString (queryText , "$1 __stub_primary_key SERIAL PRIMARY KEY," )
286
340
return queryText
287
341
}
342
+
343
+ type SessionStats struct {
344
+ OkCount int
345
+ TotalCount int
346
+
347
+ MatchToRules map [string ]* CounterWithExample [string ] // [rule name] query example
348
+ UnknownProblems map [internal.YdbIssue ]* CounterWithExample [internal.YdbIssue ]
349
+ }
350
+
351
+ func (s * SessionStats ) CountOK () {
352
+ s .OkCount ++
353
+ }
354
+
355
+ func (s * SessionStats ) CountKnown (ruleName string , query string ) {
356
+ s .TotalCount ++
357
+ if s .MatchToRules == nil {
358
+ s .MatchToRules = make (map [string ]* CounterWithExample [string ])
359
+ }
360
+
361
+ var stat * CounterWithExample [string ]
362
+ var ok bool
363
+ if stat , ok = s .MatchToRules [ruleName ]; ! ok {
364
+ stat = & CounterWithExample [string ]{
365
+ ID : ruleName ,
366
+ Example : query ,
367
+ }
368
+ s .MatchToRules [ruleName ] = stat
369
+ }
370
+
371
+ stat .Count ++
372
+ }
373
+
374
+ func (s * SessionStats ) CountUnknown (issues []internal.YdbIssue , query string ) {
375
+ s .TotalCount ++
376
+ if s .UnknownProblems == nil {
377
+ s .UnknownProblems = make (map [internal.YdbIssue ]* CounterWithExample [internal.YdbIssue ])
378
+ }
379
+
380
+ for _ , issue := range issues {
381
+ var stat * CounterWithExample [internal.YdbIssue ]
382
+ var ok bool
383
+ if stat , ok = s .UnknownProblems [issue ]; ! ok {
384
+ stat = & CounterWithExample [internal.YdbIssue ]{
385
+ ID : issue ,
386
+ Example : query ,
387
+ }
388
+ s .UnknownProblems [issue ] = stat
389
+ }
390
+ stat .Count ++
391
+ }
392
+ }
393
+
394
+ func (s * SessionStats ) GetTopKnown (count int ) []CounterWithExample [string ] {
395
+ return getTopCounter (s .MatchToRules , count )
396
+ }
397
+
398
+ func (s * SessionStats ) GetTopUnknown (count int ) []CounterWithExample [internal.YdbIssue ] {
399
+ return getTopCounter (s .UnknownProblems , count )
400
+ }
401
+
402
+ func (s * SessionStats ) PrintStats () {
403
+ fmt .Println ("Queries stat." )
404
+ fmt .Println ("Ok Count:" , s .OkCount )
405
+ fmt .Println ()
406
+ fmt .Println ("Known issues" )
407
+ SessionStats_printExampleCounter (getTopCounter (s .MatchToRules , 10 ))
408
+
409
+ fmt .Println ("New issues" )
410
+ SessionStats_printExampleCounter (getTopCounter (s .UnknownProblems , 10 ))
411
+ }
412
+
413
+ func SessionStats_printExampleCounter [K comparable ](examples []CounterWithExample [K ]) {
414
+ for _ , example := range examples {
415
+ fmt .Printf (`
416
+ Problem: %v
417
+ Count: %v
418
+ Example: %v
419
+
420
+ ` , example .ID , example .Count , example .Example )
421
+ }
422
+ }
423
+
424
+ type CounterWithExample [K comparable ] struct {
425
+ ID K `yaml:"id"`
426
+ Count int `yaml:"count"`
427
+ Example string `yaml:"example"`
428
+ }
429
+
430
+ func getTopCounter [K comparable ](m map [K ]* CounterWithExample [K ], count int ) []CounterWithExample [K ] {
431
+ res := make ([]CounterWithExample [K ], 0 , len (m ))
432
+ for _ , stat := range m {
433
+ res = append (res , * stat )
434
+ }
435
+
436
+ // Max counts
437
+ slices .SortFunc (res , func (a , b CounterWithExample [K ]) int {
438
+ return b .Count - a .Count
439
+ })
440
+
441
+ if count >= len (res ) {
442
+ return res
443
+ }
444
+
445
+ return res [:count ]
446
+ }
447
+
448
+ func (s * SessionStats ) SaveToFile (path string ) error {
449
+ var statFile struct {
450
+ TotalCount int `yaml:"total_count"`
451
+ OkCount int `yaml:"ok_count"`
452
+ OkPercent float64 `yaml:"ok_percent"`
453
+ UnknownIssues []CounterWithExample [internal.YdbIssue ] `yaml:"unknown_issues"`
454
+ KnownIssues []CounterWithExample [string ] `yaml:"known_issues"`
455
+ }
456
+
457
+ statFile .TotalCount = s .TotalCount
458
+ statFile .OkCount = s .OkCount
459
+ statFile .OkPercent = float64 (s .OkCount ) / float64 (s .TotalCount ) * 100
460
+ statFile .UnknownIssues = s .GetTopUnknown (math .MaxInt )
461
+ statFile .KnownIssues = s .GetTopKnown (math .MaxInt )
462
+
463
+ for i := range statFile .UnknownIssues {
464
+ statFile .UnknownIssues [i ].Example = cleanStringForLiteralYaml (statFile .UnknownIssues [i ].Example )
465
+ }
466
+ for i := range statFile .KnownIssues {
467
+ statFile .KnownIssues [i ].Example = cleanStringForLiteralYaml (statFile .KnownIssues [i ].Example )
468
+ }
469
+
470
+ f , err := os .Create (path )
471
+ if err != nil {
472
+ return fmt .Errorf ("failed to create file for write stat: %w" , err )
473
+ }
474
+ defer f .Close ()
475
+ encoder := yaml .NewEncoder (f )
476
+ if err = encoder .Encode (& statFile ); err != nil {
477
+ return fmt .Errorf ("failed to write stat: %w" , err )
478
+ }
479
+ return nil
480
+ }
481
+
482
+ func cleanStringForLiteralYaml (s string ) string {
483
+ lines := strings .Split (s , "\n " )
484
+ for i , line := range lines {
485
+ // trim ending space
486
+ for strings .HasSuffix (line , " " ) {
487
+ line = strings .TrimSuffix (line , " " )
488
+ }
489
+ lines [i ] = line
490
+ }
491
+
492
+ s = strings .Join (lines , "\n " )
493
+
494
+ sBytes := []byte (s )
495
+ buf := & strings.Builder {}
496
+
497
+ // range over runes
498
+ for i , r := range s {
499
+ if isYamlPrintable (sBytes , i ) {
500
+ buf .WriteRune (r )
501
+ } else {
502
+ buf .WriteByte ('X' )
503
+ }
504
+ }
505
+
506
+ return buf .String ()
507
+ }
508
+
509
+ func isYamlPrintable (b []byte , i int ) bool {
510
+ // copy of yaml.is_printable
511
+ return ((b [i ] == 0x0A ) || // . == #x0A
512
+ (b [i ] >= 0x20 && b [i ] <= 0x7E ) || // #x20 <= . <= #x7E
513
+ (b [i ] == 0xC2 && b [i + 1 ] >= 0xA0 ) || // #0xA0 <= . <= #xD7FF
514
+ (b [i ] > 0xC2 && b [i ] < 0xED ) ||
515
+ (b [i ] == 0xED && b [i + 1 ] < 0xA0 ) ||
516
+ (b [i ] == 0xEE ) ||
517
+ (b [i ] == 0xEF && // #xE000 <= . <= #xFFFD
518
+ ! (b [i + 1 ] == 0xBB && b [i + 2 ] == 0xBF ) && // && . != #xFEFF
519
+ ! (b [i + 1 ] == 0xBF && (b [i + 2 ] == 0xBE || b [i + 2 ] == 0xBF ))))
520
+ }
0 commit comments