Skip to content
This repository was archived by the owner on Sep 30, 2024. It is now read-only.

Commit a47018c

Browse files
feat: code insights language stats performance improvements (#62011)
* feat: raise timeout and introduce parallel gitserver calls * chore: rework semaphore into inventory client + separate redis and gitserver semaphores * feat: introduce new properties to tweak GetInventory performance; new exclusion filter for getInventory * chore: add and fix tests * chore: revert rcache version change * chore: omit err check Co-authored-by: Camden Cheek <[email protected]> * chore: simplify closure * fix: update bazel files * fix: linting and use grafana regexp * fix: default values for config; use conc package * fix: apply correct locking and tracing * chore: replace experimental config with env vars (still needs build file updates) * fix: enforce DFS to better utilize tree caching * fix: run bazel configure * fix: bazel configure and use correct var * fix: remove DFS and fix nogo errors * fix: increase redis concurrency * chore: trace cleanup and logging * chore: add changelog entry * chore: update bazel files * chore: update bazel files * chore: harden test for concurrency * fix: allow data race in entries_test.go * fix: allow data race in entries_test.go * chore: update comment on why we don't do a recursive call --------- Co-authored-by: Camden Cheek <[email protected]>
1 parent 6cb50bc commit a47018c

File tree

10 files changed

+164
-40
lines changed

10 files changed

+164
-40
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ All notable changes to Sourcegraph are documented in this file.
2727

2828
- Improved syntax highlighting for Dart. [#58480](https://github.com/sourcegraph/sourcegraph/pull/58480)
2929
- The default noop Event type in the honey package has been replaced with a new type that aggregates fields in memory for testing and logging purposes. [#61854](https://github.com/sourcegraph/sourcegraph/pull/61854)
30+
- Improved the performance of Language Stats Insights by 50-70% by increasing the concurrent requests from the frontend to the gitserver from 1 to 4. You can override the concurrency with the `GET_INVENTORY_GIT_SERVER_CONCURRENCY` environment variable. [#62011](https://github.com/sourcegraph/sourcegraph/pull/62011)
31+
- Raised the backend timeout for Language Stats Insights from 3 minutes to 5 minutes. You can override this with the `GET_INVENTORY_TIMEOUT` environment variable. [#62011](https://github.com/sourcegraph/sourcegraph/pull/62011)
3032

3133
### Fixed
3234

cmd/frontend/backend/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ go_library(
6969
"@com_github_prometheus_client_golang//prometheus/promauto",
7070
"@com_github_sourcegraph_log//:log",
7171
"@io_opentelemetry_go_otel//attribute",
72+
"@org_golang_x_sync//semaphore",
7273
],
7374
)
7475

cmd/frontend/backend/inventory.go

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"context"
55
"encoding/json"
66
"fmt"
7+
"github.com/sourcegraph/sourcegraph/internal/trace"
8+
"golang.org/x/sync/semaphore"
79
"io"
810
"io/fs"
911
"strconv"
@@ -25,6 +27,22 @@ var useEnhancedLanguageDetection, _ = strconv.ParseBool(env.Get("USE_ENHANCED_LA
2527

2628
var inventoryCache = rcache.New(fmt.Sprintf("inv:v2:enhanced_%v", useEnhancedLanguageDetection))
2729

30+
var gitServerConcurrency, _ = strconv.Atoi(env.Get("GET_INVENTORY_GIT_SERVER_CONCURRENCY", "4", "Changes the number of concurrent requests against the gitserver for getInventory requests."))
31+
32+
// Raising this value to 50 or higher lead to the following error on my dev machine
33+
// lvl=warn msg="failed to execute redis command" cmd=GET error="dial tcp *********:6379: connect: can't assign requested address"
34+
var redisConcurrency, _ = strconv.Atoi(env.Get("GET_INVENTORY_REDIS_CONCURRENCY", "20", "Changes the number of concurrent requests against the redis cache for getInventory requests."))
35+
36+
type semaphoredReadCloser struct {
37+
io.ReadCloser
38+
releaseSemaphore func()
39+
}
40+
41+
func (s *semaphoredReadCloser) Close() error {
42+
defer s.releaseSemaphore()
43+
return s.ReadCloser.Close()
44+
}
45+
2846
// InventoryContext returns the inventory context for computing the inventory for the repository at
2947
// the given commit.
3048
func InventoryContext(logger log.Logger, repo api.RepoName, gsClient gitserver.Client, commitID api.CommitID, forceEnhancedLanguageDetection bool) (inventory.Context, error) {
@@ -40,22 +58,50 @@ func InventoryContext(logger log.Logger, repo api.RepoName, gsClient gitserver.C
4058
return info.OID().String()
4159
}
4260

61+
gitServerSemaphore := semaphore.NewWeighted(int64(gitServerConcurrency))
62+
redisSemaphore := semaphore.NewWeighted(int64(redisConcurrency))
63+
4364
logger = logger.Scoped("InventoryContext").
4465
With(log.String("repo", string(repo)), log.String("commitID", string(commitID)))
4566
invCtx := inventory.Context{
4667
ReadTree: func(ctx context.Context, path string) ([]fs.FileInfo, error) {
47-
// TODO: As a perf optimization, we could read multiple levels of the Git tree at once
48-
// to avoid sequential tree traversal calls.
68+
trc, ctx := trace.New(ctx, "ReadTree waits for semaphore")
69+
err := gitServerSemaphore.Acquire(ctx, 1)
70+
trc.End()
71+
if err != nil {
72+
return nil, err
73+
}
74+
defer gitServerSemaphore.Release(1)
75+
// Using recurse=true does not yield a significant performance improvement. See https://github.com/sourcegraph/sourcegraph/pull/62011/files#r1577513913.
4976
return gsClient.ReadDir(ctx, repo, commitID, path, false)
5077
},
5178
NewFileReader: func(ctx context.Context, path string) (io.ReadCloser, error) {
52-
return gsClient.NewFileReader(ctx, repo, commitID, path)
79+
trc, ctx := trace.New(ctx, "NewFileReader waits for semaphore")
80+
err := gitServerSemaphore.Acquire(ctx, 1)
81+
trc.End()
82+
if err != nil {
83+
return nil, err
84+
}
85+
reader, err := gsClient.NewFileReader(ctx, repo, commitID, path)
86+
if err != nil {
87+
return nil, err
88+
}
89+
return &semaphoredReadCloser{ReadCloser: reader, releaseSemaphore: func() {
90+
gitServerSemaphore.Release(1)
91+
}}, nil
5392
},
54-
CacheGet: func(e fs.FileInfo) (inventory.Inventory, bool) {
93+
CacheGet: func(ctx context.Context, e fs.FileInfo) (inventory.Inventory, bool) {
5594
cacheKey := cacheKey(e)
5695
if cacheKey == "" {
5796
return inventory.Inventory{}, false // not cacheable
5897
}
98+
99+
if err := redisSemaphore.Acquire(ctx, 1); err != nil {
100+
logger.Warn("Failed to acquire semaphore for redis cache.", log.String("path", e.Name()), log.Error(err))
101+
return inventory.Inventory{}, false
102+
}
103+
defer redisSemaphore.Release(1)
104+
59105
if b, ok := inventoryCache.Get(cacheKey); ok {
60106
var inv inventory.Inventory
61107
if err := json.Unmarshal(b, &inv); err != nil {
@@ -66,7 +112,7 @@ func InventoryContext(logger log.Logger, repo api.RepoName, gsClient gitserver.C
66112
}
67113
return inventory.Inventory{}, false
68114
},
69-
CacheSet: func(e fs.FileInfo, inv inventory.Inventory) {
115+
CacheSet: func(ctx context.Context, e fs.FileInfo, inv inventory.Inventory) {
70116
cacheKey := cacheKey(e)
71117
if cacheKey == "" {
72118
return // not cacheable
@@ -76,6 +122,12 @@ func InventoryContext(logger log.Logger, repo api.RepoName, gsClient gitserver.C
76122
logger.Warn("Failed to marshal JSON inventory for cache.", log.String("path", e.Name()), log.Error(err))
77123
return
78124
}
125+
126+
if err := redisSemaphore.Acquire(ctx, 1); err != nil {
127+
logger.Warn("Failed to acquire semaphore for redis cache.", log.String("path", e.Name()), log.Error(err))
128+
return
129+
}
130+
defer redisSemaphore.Release(1)
79131
inventoryCache.Set(cacheKey, b)
80132
},
81133
}

cmd/frontend/backend/repos.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ package backend
33
import (
44
"context"
55
"fmt"
6+
"github.com/sourcegraph/sourcegraph/internal/env"
67
"net/http"
8+
"strconv"
79
"time"
810

911
"github.com/prometheus/client_golang/prometheus"
@@ -249,6 +251,8 @@ func (s *repos) ListIndexable(ctx context.Context) (repos []types.MinimalRepo, e
249251
})
250252
}
251253

254+
var getInventoryTimeout, _ = strconv.Atoi(env.Get("GET_INVENTORY_TIMEOUT", "5", "Time in minutes before cancelling getInventory requests. Raise this if your repositories are large and need a long time to process."))
255+
252256
func (s *repos) GetInventory(ctx context.Context, repo api.RepoName, commitID api.CommitID, forceEnhancedLanguageDetection bool) (res *inventory.Inventory, err error) {
253257
if Mocks.Repos.GetInventory != nil {
254258
return Mocks.Repos.GetInventory(ctx, repo, commitID)
@@ -257,8 +261,7 @@ func (s *repos) GetInventory(ctx context.Context, repo api.RepoName, commitID ap
257261
ctx, done := startTrace(ctx, "GetInventory", map[string]any{"repo": repo, "commitID": commitID}, &err)
258262
defer done()
259263

260-
// Cap GetInventory operation to some reasonable time.
261-
ctx, cancel := context.WithTimeout(ctx, 3*time.Minute)
264+
ctx, cancel := context.WithTimeout(ctx, time.Duration(getInventoryTimeout)*time.Minute)
262265
defer cancel()
263266

264267
invCtx, err := InventoryContext(s.logger, repo, s.gitserverClient, commitID, forceEnhancedLanguageDetection)

cmd/frontend/internal/inventory/BUILD.bazel

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,14 @@ go_library(
1111
importpath = "github.com/sourcegraph/sourcegraph/cmd/frontend/internal/inventory",
1212
visibility = ["//cmd/frontend:__subpackages__"],
1313
deps = [
14+
"//internal/trace",
1415
"//lib/errors",
1516
"@com_github_go_enry_go_enry_v2//:go-enry",
1617
"@com_github_go_enry_go_enry_v2//data",
18+
"@com_github_grafana_regexp//:regexp",
19+
"@com_github_sourcegraph_conc//iter",
1720
"@com_github_sourcegraph_log//:log",
21+
"@io_opentelemetry_go_otel//attribute",
1822
],
1923
)
2024

@@ -25,6 +29,7 @@ go_test(
2529
"inventory_test.go",
2630
],
2731
embed = [":inventory"],
32+
race = "off",
2833
deps = [
2934
"//internal/fileutil",
3035
"//lib/errors",

cmd/frontend/internal/inventory/context.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ type Context struct {
1717
NewFileReader func(ctx context.Context, path string) (io.ReadCloser, error)
1818

1919
// CacheGet, if set, returns the cached inventory and true for the given tree, or false for a cache miss.
20-
CacheGet func(fs.FileInfo) (Inventory, bool)
20+
CacheGet func(context.Context, fs.FileInfo) (Inventory, bool)
2121

2222
// CacheSet, if set, stores the inventory in the cache for the given tree.
23-
CacheSet func(fs.FileInfo, Inventory)
23+
CacheSet func(context.Context, fs.FileInfo, Inventory)
2424
}

cmd/frontend/internal/inventory/entries.go

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ package inventory
22

33
import (
44
"context"
5+
"github.com/sourcegraph/conc/iter"
6+
"github.com/sourcegraph/sourcegraph/lib/errors"
57
"io/fs"
68
"sort"
7-
8-
"github.com/sourcegraph/sourcegraph/lib/errors"
99
)
1010

1111
// fileReadBufferSize is the size of the buffer we'll use while reading file contents
@@ -48,14 +48,14 @@ func (c *Context) entries(ctx context.Context, entries []fs.FileInfo, buf []byte
4848
func (c *Context) tree(ctx context.Context, tree fs.FileInfo, buf []byte) (inv Inventory, err error) {
4949
// Get and set from the cache.
5050
if c.CacheGet != nil {
51-
if inv, ok := c.CacheGet(tree); ok {
51+
if inv, ok := c.CacheGet(ctx, tree); ok {
5252
return inv, nil // cache hit
5353
}
5454
}
5555
if c.CacheSet != nil {
5656
defer func() {
5757
if err == nil {
58-
c.CacheSet(tree, inv) // store in cache
58+
c.CacheSet(ctx, tree, inv) // store in cache
5959
}
6060
}()
6161
}
@@ -64,45 +64,44 @@ func (c *Context) tree(ctx context.Context, tree fs.FileInfo, buf []byte) (inv I
6464
if err != nil {
6565
return Inventory{}, err
6666
}
67-
invs := make([]Inventory, len(entries))
68-
for i, e := range entries {
67+
68+
inventories, err := iter.MapErr(entries, func(entry *fs.FileInfo) (Inventory, error) {
69+
e := *entry
6970
switch {
7071
case e.Mode().IsRegular(): // file
7172
// Don't individually cache files that we found during tree traversal. The hit rate for
7273
// those cache entries is likely to be much lower than cache entries for files whose
7374
// inventory was directly requested.
7475
lang, err := getLang(ctx, e, buf, c.NewFileReader)
75-
if err != nil {
76-
return Inventory{}, errors.Wrapf(err, "inventory file %q", e.Name())
77-
}
78-
invs[i] = Inventory{Languages: []Lang{lang}}
79-
76+
return Inventory{Languages: []Lang{lang}}, err
8077
case e.Mode().IsDir(): // subtree
8178
subtreeInv, err := c.tree(ctx, e, buf)
82-
if err != nil {
83-
return Inventory{}, errors.Wrapf(err, "inventory tree %q", e.Name())
84-
}
85-
invs[i] = subtreeInv
86-
79+
return subtreeInv, err
8780
default:
8881
// Skip symlinks, submodules, etc.
82+
return Inventory{}, nil
8983
}
84+
})
85+
86+
if err != nil {
87+
return Inventory{}, err
9088
}
91-
return Sum(invs), nil
89+
90+
return Sum(inventories), nil
9291
}
9392

9493
// file computes the inventory of a single file. It caches the result.
9594
func (c *Context) file(ctx context.Context, file fs.FileInfo, buf []byte) (inv Inventory, err error) {
9695
// Get and set from the cache.
9796
if c.CacheGet != nil {
98-
if inv, ok := c.CacheGet(file); ok {
97+
if inv, ok := c.CacheGet(ctx, file); ok {
9998
return inv, nil // cache hit
10099
}
101100
}
102101
if c.CacheSet != nil {
103102
defer func() {
104103
if err == nil {
105-
c.CacheSet(file, inv) // store in cache
104+
c.CacheSet(ctx, file, inv) // store in cache
106105
}
107106
}()
108107
}

cmd/frontend/internal/inventory/entries_test.go

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"io/fs"
88
"os"
99
"reflect"
10+
"sync"
1011
"testing"
1112

1213
"github.com/google/go-cmp/cmp"
@@ -21,8 +22,11 @@ func TestContext_Entries(t *testing.T) {
2122
cacheGetCalls []string
2223
cacheSetCalls = map[string]Inventory{}
2324
)
25+
var mu sync.Mutex
2426
c := Context{
2527
ReadTree: func(ctx context.Context, path string) ([]fs.FileInfo, error) {
28+
mu.Lock()
29+
defer mu.Unlock()
2630
readTreeCalls = append(readTreeCalls, path)
2731
switch path {
2832
case "d":
@@ -37,6 +41,8 @@ func TestContext_Entries(t *testing.T) {
3741
}
3842
},
3943
NewFileReader: func(ctx context.Context, path string) (io.ReadCloser, error) {
44+
mu.Lock()
45+
defer mu.Unlock()
4046
newFileReaderCalls = append(newFileReaderCalls, path)
4147
var data []byte
4248
switch path {
@@ -51,11 +57,15 @@ func TestContext_Entries(t *testing.T) {
5157
}
5258
return io.NopCloser(bytes.NewReader(data)), nil
5359
},
54-
CacheGet: func(e fs.FileInfo) (Inventory, bool) {
60+
CacheGet: func(ctx context.Context, e fs.FileInfo) (Inventory, bool) {
61+
mu.Lock()
62+
defer mu.Unlock()
5563
cacheGetCalls = append(cacheGetCalls, e.Name())
5664
return Inventory{}, false
5765
},
58-
CacheSet: func(e fs.FileInfo, inv Inventory) {
66+
CacheSet: func(ctx context.Context, e fs.FileInfo, inv Inventory) {
67+
mu.Lock()
68+
defer mu.Unlock()
5969
if _, ok := cacheSetCalls[e.Name()]; ok {
6070
t.Fatalf("already stored %q in cache", e.Name())
6171
}
@@ -83,13 +93,8 @@ func TestContext_Entries(t *testing.T) {
8393
if want := []string{"d", "d/a"}; !reflect.DeepEqual(readTreeCalls, want) {
8494
t.Errorf("ReadTree calls: got %q, want %q", readTreeCalls, want)
8595
}
86-
if want := []string{
87-
// We need to read all files to get line counts
88-
"d/a/c.m",
89-
"d/b.go",
90-
"f.go",
91-
}; !reflect.DeepEqual(newFileReaderCalls, want) {
92-
t.Errorf("GetFileReader calls: got %q, want %q", newFileReaderCalls, want)
96+
if len(newFileReaderCalls) != 3 {
97+
t.Errorf("GetFileReader calls: got %d, want %d", len(newFileReaderCalls), 3)
9398
}
9499
if want := []string{"d", "d/a", "f.go"}; !reflect.DeepEqual(cacheGetCalls, want) {
95100
t.Errorf("CacheGet calls: got %q, want %q", cacheGetCalls, want)

0 commit comments

Comments
 (0)