@@ -4,6 +4,7 @@ Copyright © 2024 NAME HERE <EMAIL ADDRESS>
4
4
package cmd
5
5
6
6
import (
7
+ "compress/gzip"
7
8
"context"
8
9
"encoding/json"
9
10
"errors"
@@ -16,6 +17,7 @@ import (
16
17
"slices"
17
18
"strings"
18
19
"sync"
20
+ "sync/atomic"
19
21
"time"
20
22
21
23
"github.com/spf13/cobra"
@@ -29,6 +31,7 @@ import (
29
31
var checkPgQueriesConfig struct {
30
32
schemeDumpFile string
31
33
sessionsLog string
34
+ sessionsLogNeedSort bool
32
35
includeFailed bool
33
36
ydbConnectionString string
34
37
limitRequests int
@@ -41,6 +44,7 @@ var checkPgQueriesConfig struct {
41
44
printStats bool
42
45
printProgressEveryQueries int
43
46
writeStatPath string
47
+ writeStatEveryItems int
44
48
checkersCount int
45
49
}
46
50
@@ -49,6 +53,7 @@ func init() {
49
53
50
54
checkPgQueriesCmd .PersistentFlags ().StringVar (& checkPgQueriesConfig .schemeDumpFile , "schemedump-file" , "" , "Path to dump of db schema. Set empty for skip read schema." )
51
55
checkPgQueriesCmd .PersistentFlags ().StringVar (& checkPgQueriesConfig .sessionsLog , "query-log" , "" , "Set path to input sessions log" )
56
+ checkPgQueriesCmd .PersistentFlags ().BoolVar (& checkPgQueriesConfig .sessionsLogNeedSort , "query-log-need-sort" , false , "Sort query log in memory before start" )
52
57
must0 (checkPgQueriesCmd .MarkPersistentFlagRequired ("query-log" ))
53
58
54
59
checkPgQueriesCmd .PersistentFlags ().BoolVar (& checkPgQueriesConfig .includeFailed , "include-failed" , false , "Extract sessions with failed transactions" )
@@ -61,8 +66,10 @@ func init() {
61
66
checkPgQueriesCmd .PersistentFlags ().BoolVar (& checkPgQueriesConfig .printQueryForKnownIssue , "print-query-for-known-issues" , true , "Print query for known issues" )
62
67
checkPgQueriesCmd .PersistentFlags ().BoolVar (& checkPgQueriesConfig .printErrorsInProgress , "print-progress" , false , "Print queries in progress" )
63
68
checkPgQueriesCmd .PersistentFlags ().BoolVar (& checkPgQueriesConfig .printStats , "print-stats" , true , "Print queries in progress" )
64
- checkPgQueriesCmd .PersistentFlags ().IntVar (& checkPgQueriesConfig .printProgressEveryQueries , "print-progress-every-queries" , 10 , "Periodically print progress" )
69
+ checkPgQueriesCmd .PersistentFlags ().IntVar (& checkPgQueriesConfig .printProgressEveryQueries , "print-progress-every-queries" , 100 , "Periodically print progress" )
65
70
checkPgQueriesCmd .PersistentFlags ().StringVar (& checkPgQueriesConfig .writeStatPath , "write-stat-file" , "" , "Path to write full stat file if need. Will write example of queries" )
71
+ checkPgQueriesCmd .PersistentFlags ().IntVar (& checkPgQueriesConfig .writeStatEveryItems , "write-stat-every-items" , 10000 , "Interval for write current stat" )
72
+
66
73
checkPgQueriesCmd .PersistentFlags ().IntVar (& checkPgQueriesConfig .checkersCount , "check-queries-parallel" , 5 , "How many queries may be checked in parallel" )
67
74
}
68
75
@@ -105,8 +112,13 @@ var checkPgQueriesCmd = &cobra.Command{
105
112
))
106
113
cancel ()
107
114
108
- sessions := readSessions ()
109
- queries := extractQueries (sessions )
115
+ var queries <- chan string
116
+ fileReader := openFileReader ()
117
+ if checkPgQueriesConfig .sessionsLogNeedSort {
118
+ queries = generateQueriesFromUnsortedSessions (fileReader )
119
+ } else {
120
+ queries = readSortedQueries (fileReader )
121
+ }
110
122
111
123
log .Println ("Start check queries" )
112
124
var stats QueryStats
@@ -127,8 +139,105 @@ var checkPgQueriesCmd = &cobra.Command{
127
139
},
128
140
}
129
141
130
- func readSessions () []internal.Session {
131
- reader := must (os .Open (checkPgQueriesConfig .sessionsLog ))
142
+ func openFileReader () io.ReadCloser {
143
+ filepath := checkPgQueriesConfig .sessionsLog
144
+ fileReader , err := os .Open (filepath )
145
+ if err != nil {
146
+ log .Fatalf ("Failed to open file %q: %v" , filepath , err )
147
+ }
148
+
149
+ if strings .HasSuffix (strings .ToLower (filepath ), ".gz" ) {
150
+ gzipReader , err := gzip .NewReader (fileReader )
151
+ if err != nil {
152
+ log .Fatalf ("Failed to start gzip reader for %q: %v" , filepath , err )
153
+ }
154
+ return gzipReaderClose {
155
+ gzipReader : gzipReader ,
156
+ fileReader : fileReader ,
157
+ }
158
+ }
159
+
160
+ return fileReader
161
+ }
162
+
163
+ type gzipReaderClose struct {
164
+ gzipReader * gzip.Reader
165
+ fileReader * os.File
166
+ }
167
+
168
+ func (g gzipReaderClose ) Read (p []byte ) (n int , err error ) {
169
+ return g .gzipReader .Read (p )
170
+ }
171
+
172
+ func (g gzipReaderClose ) Close () error {
173
+ gzipCloseErr := g .gzipReader .Close ()
174
+ fileCloseErr := g .fileReader .Close ()
175
+
176
+ if gzipCloseErr != nil {
177
+ return gzipCloseErr
178
+ }
179
+
180
+ return fileCloseErr
181
+ }
182
+
183
+ func readSortedQueries (reader io.ReadCloser ) <- chan string {
184
+ queries := make (chan string )
185
+ go func () {
186
+ defer reader .Close ()
187
+ defer close (queries )
188
+
189
+ decoder := json .NewDecoder (reader )
190
+ limitCount := checkPgQueriesConfig .limitRequests
191
+ counter := 0
192
+
193
+ needDeleteLine := false
194
+ for {
195
+ if limitCount > 0 && counter >= limitCount {
196
+ log .Println ("Count limit reached" )
197
+ return
198
+ }
199
+
200
+ var item internal.SessionLogRecord
201
+ if err := decoder .Decode (& item ); err != nil {
202
+ switch {
203
+ case errors .Is (err , io .EOF ):
204
+ log .Printf ("Read file completed, read items: %v" , counter )
205
+ return
206
+ case err != nil :
207
+ log .Printf ("Failed to decode item %v: %v" , counter , err )
208
+ return
209
+ default :
210
+ // pass
211
+ }
212
+ }
213
+
214
+ queries <- item .Query
215
+ counter ++
216
+ if counter % checkPgQueriesConfig .printProgressEveryQueries == 0 {
217
+ if needDeleteLine {
218
+ printDeleteLine ()
219
+ } else {
220
+ needDeleteLine = true
221
+ }
222
+
223
+ var percent float64
224
+ if limitCount > 0 {
225
+ percent = float64 (counter ) / float64 (limitCount ) * 100
226
+ }
227
+ log .Printf ("Read items %v/%v (%0.2f)" , counter , limitCount , percent )
228
+ }
229
+ }
230
+ }()
231
+
232
+ return queries
233
+ }
234
+
235
+ func generateQueriesFromUnsortedSessions (reader io.ReadCloser ) <- chan string {
236
+ sessions := readSessions (reader )
237
+ return extractQueries (sessions )
238
+ }
239
+
240
+ func readSessions (reader io.ReadCloser ) []internal.Session {
132
241
defer reader .Close ()
133
242
134
243
decoder := json .NewDecoder (reader )
@@ -241,7 +350,7 @@ func extractQueries(sessions []internal.Session) <-chan string {
241
350
if queryIndex % checkPgQueriesConfig .printProgressEveryQueries == 0 {
242
351
percent := float64 (queryIndex ) / float64 (totalQueries ) * 100
243
352
if needRemoveLine {
244
- fmt . Printf ( " \033 [1A \033 [K" )
353
+ printDeleteLine ( )
245
354
} else {
246
355
needRemoveLine = true
247
356
}
@@ -258,18 +367,30 @@ func extractQueries(sessions []internal.Session) <-chan string {
258
367
return queries
259
368
}
260
369
370
+ func printDeleteLine () {
371
+ fmt .Printf ("\033 [1A\033 [K" )
372
+ }
373
+
261
374
func checkQueries (rules Rules , stats * QueryStats , db * ydb.Driver , queries <- chan string ) {
262
375
if checkPgQueriesConfig .checkersCount < 1 {
263
376
log .Fatalf ("can't start less then 1 checker, got: %v" , checkPgQueriesConfig .checkersCount )
264
377
}
265
378
379
+ var itemsCounter atomic.Int64
380
+ writeStatEveryItems := int64 (checkPgQueriesConfig .writeStatEveryItems )
266
381
var wg sync.WaitGroup
267
382
for range checkPgQueriesConfig .checkersCount {
268
383
wg .Add (1 )
269
384
go func () {
270
385
defer wg .Done ()
271
386
for q := range queries {
272
387
checkQuery (stats , rules , db , q )
388
+ counter := itemsCounter .Add (1 )
389
+ if counter % writeStatEveryItems == 0 && checkPgQueriesConfig .writeStatPath != "" {
390
+ if err := stats .SaveToFile (checkPgQueriesConfig .writeStatPath ); err != nil {
391
+ log .Printf ("Stat file written failed %q: %v" , checkPgQueriesConfig .writeStatPath , err )
392
+ }
393
+ }
273
394
}
274
395
}()
275
396
}
@@ -356,12 +477,14 @@ func cutGreenplumSpecific(q string) string {
356
477
357
478
var (
358
479
createAndDistributedByWithBrackets = regexp .MustCompile (`(?is)CREATE\s+.*\sTABLE\s+.*\s+AS\s+\(\s*(.*)\s*\)\s+DISTRIBUTED\s+BY\s\(.*\)` )
359
- createTableAsSelect = regexp .MustCompile (`(?i )create\s+(temporary\s+)?table .* as` )
480
+ createTableAsSelect = regexp .MustCompile (`(?is )create\s+(temporary\s+)?table .* as` )
360
481
distributedBy = regexp .MustCompile (`(?i)DISTRIBUTED BY \(.*\)` )
361
482
)
362
483
363
484
type QueryStats struct {
364
- m sync.Mutex
485
+ m sync.RWMutex
486
+ writeStatMutex sync.Mutex
487
+
365
488
OkCount int
366
489
TotalCount int
367
490
@@ -370,9 +493,13 @@ type QueryStats struct {
370
493
}
371
494
372
495
func (s * QueryStats ) GetOkPercent () float64 {
373
- s .m .Lock ()
374
- defer s .m .Unlock ()
496
+ s .m .RLock ()
497
+ defer s .m .RUnlock ()
498
+
499
+ return s .getOkPercentNeedLock ()
500
+ }
375
501
502
+ func (s * QueryStats ) getOkPercentNeedLock () float64 {
376
503
return float64 (s .OkCount ) / float64 (s .TotalCount ) * 100
377
504
}
378
505
@@ -434,16 +561,24 @@ func (s *QueryStats) CountAsUnknown(reason string, query string) {
434
561
}
435
562
436
563
func (s * QueryStats ) GetTopKnown (count int ) []CounterWithExample [string ] {
437
- s .m .Lock ()
438
- defer s .m .Unlock ()
564
+ s .m .RLock ()
565
+ defer s .m .RUnlock ()
439
566
567
+ return s .getTopKnownNeedLock (count )
568
+ }
569
+
570
+ func (s * QueryStats ) getTopKnownNeedLock (count int ) []CounterWithExample [string ] {
440
571
return getTopCounter (s .MatchToRules , count )
441
572
}
442
573
443
574
func (s * QueryStats ) GetTopUnknown (count int ) []CounterWithExample [string ] {
444
- s .m .Lock ()
445
- defer s .m .Unlock ()
575
+ s .m .RLock ()
576
+ defer s .m .RUnlock ()
446
577
578
+ return s .getTopUnknownNeedLock (count )
579
+ }
580
+
581
+ func (s * QueryStats ) getTopUnknownNeedLock (count int ) []CounterWithExample [string ] {
447
582
return getTopCounter (s .UnknownProblems , count )
448
583
}
449
584
@@ -497,6 +632,12 @@ func getTopCounter[K comparable](m map[K]*CounterWithExample[K], count int) []Co
497
632
}
498
633
499
634
func (s * QueryStats ) SaveToFile (path string ) error {
635
+ s .writeStatMutex .Lock ()
636
+ defer s .writeStatMutex .Unlock ()
637
+
638
+ s .m .RLock ()
639
+ defer s .m .RUnlock ()
640
+
500
641
var statFile struct {
501
642
TotalCount int `yaml:"total_count"`
502
643
OkCount int `yaml:"ok_count"`
@@ -507,9 +648,9 @@ func (s *QueryStats) SaveToFile(path string) error {
507
648
508
649
statFile .TotalCount = s .TotalCount
509
650
statFile .OkCount = s .OkCount
510
- statFile .OkPercent = s .GetOkPercent ()
511
- statFile .UnknownIssues = s .GetTopUnknown (math .MaxInt )
512
- statFile .KnownIssues = s .GetTopKnown (math .MaxInt )
651
+ statFile .OkPercent = s .getOkPercentNeedLock ()
652
+ statFile .UnknownIssues = s .getTopUnknownNeedLock (math .MaxInt )
653
+ statFile .KnownIssues = s .getTopKnownNeedLock (math .MaxInt )
513
654
514
655
for i := range statFile .UnknownIssues {
515
656
statFile .UnknownIssues [i ].Example = cleanStringForLiteralYaml (statFile .UnknownIssues [i ].Example )
0 commit comments