Skip to content

Commit 00ee8fb

Browse files
authored
[perf] cache nix.searchSystem (#1546)
## Summary From observing times for adding and removing packages, I notice that a lot of time is spent in `syncPackagesToProfile` and most of that is from `nix.Search`, which is a fairly slow operation. However, from observing the results, it seems that these should be constant. For example, here are some search results keyed by the queries: https://gist.github.com/savil/584c4d9bd6c468aaa8c289265287a23a Correct me if wrong: all these seem to be constants that won't change, because the queries we run have the nixpkgs-commit-hash in them. The general [`nix search` command](https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-search.html) seems to be slower since the query it accepts is an `installable regex`, which can match a large number of concrete queries. Furthermore, it needs to accept queries like`nixpkgs#vim` and figure out which nixpkgs commit hash this would resolve to. In our case, our queries are much more specific being of the form `nixpkgs/<commit>#attribute` ## How was it tested? This effect is more pronounced for packages that are not using store-paths. This will affect all users who are still on nix < 2.17, which is a fairly large percentage for now. I have a devbox global of about 43 packages. For the times below, I ran `devbox global add vim` (where vim is already in my /nix/store), and did `export DEVBOX_FEATURE_REMOVE_NIXPKGS=0` BEFORE: syncPackagesToProfile takes about 1 min 30+ seconds AFTER: syncPackagesToProfile takes 500ms
1 parent fd13459 commit 00ee8fb

File tree

6 files changed

+172
-15
lines changed

6 files changed

+172
-15
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ require (
3939
github.com/tailscale/hujson v0.0.0-20221223112325-20486734a56a
4040
github.com/wk8/go-ordered-map/v2 v2.1.8
4141
github.com/zealic/go2node v0.1.0
42-
go.jetpack.io/pkg v0.0.0-20231012130632-77dcd111db2e
42+
go.jetpack.io/pkg v0.0.0-20231019173032-13f60a9e32b8
4343
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17
4444
golang.org/x/mod v0.12.0
4545
golang.org/x/sync v0.3.0

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,8 @@ github.com/yuin/gopher-lua v0.0.0-20190514113301-1cd887cd7036/go.mod h1:gqRgreBU
339339
github.com/zaffka/mongodb-boltdb-mock v0.0.0-20221014194232-b4bb03fbe3a0/go.mod h1:GsDD1qsG+86MeeCG7ndi6Ei3iGthKL3wQ7PTFigDfNY=
340340
github.com/zealic/go2node v0.1.0 h1:ofxpve08cmLJBwFdI0lPCk9jfwGWOSD+s6216x0oAaA=
341341
github.com/zealic/go2node v0.1.0/go.mod h1:GrkFr+HctXwP7vzcU9RsgtAeJjTQ6Ud0IPCQAqpTfBg=
342-
go.jetpack.io/pkg v0.0.0-20231012130632-77dcd111db2e h1:GtqFrE5uUf6rXFk3zaIqIfLfykrqrB8gp6bDmOfH+2s=
343-
go.jetpack.io/pkg v0.0.0-20231012130632-77dcd111db2e/go.mod h1:m49CAcLbpZttEbzoEWC1SKD+/z5mEiFdw0dQxY6AM5Q=
342+
go.jetpack.io/pkg v0.0.0-20231019173032-13f60a9e32b8 h1:xzAAUVfg9uqyJhZVc+ZDTLHtFlMhj/5St5JnVfDxbpk=
343+
go.jetpack.io/pkg v0.0.0-20231019173032-13f60a9e32b8/go.mod h1:m49CAcLbpZttEbzoEWC1SKD+/z5mEiFdw0dQxY6AM5Q=
344344
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
345345
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
346346
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=

internal/devpkg/package.go

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ func (p *Package) NormalizedPackageAttributePath() (string, error) {
285285
}
286286

287287
// normalizePackageAttributePath calls nix search to find the normalized attribute
288-
// path. It is an expensive call (~100ms).
288+
// path. It may be an expensive call (~100ms).
289289
func (p *Package) normalizePackageAttributePath() (string, error) {
290290
var query string
291291
if p.IsDevboxPackage() {
@@ -302,11 +302,24 @@ func (p *Package) normalizePackageAttributePath() (string, error) {
302302
query = p.String()
303303
}
304304

305-
// We prefer search over just trying to parse the URL because search will
306-
// guarantee that the package exists for the current system.
307-
infos, err := nix.Search(query)
308-
if err != nil {
309-
return "", err
305+
// We prefer nix.Search over just trying to parse the package's "URL" because
306+
// nix.Search will guarantee that the package exists for the current system.
307+
var infos map[string]*nix.Info
308+
var err error
309+
if p.IsDevboxPackage() && !p.IsRunX() {
310+
// Perf optimization: For queries of the form nixpkgs/<commit>#foo, we can
311+
// use a nix.Search cache.
312+
//
313+
// This will be slow if its the first time on the user's machine that this
314+
// query is running. Otherwise, it will be cached and fast.
315+
if infos, err = nix.SearchNixpkgsAttribute(query); err != nil {
316+
return "", err
317+
}
318+
} else {
319+
// fallback to the slow but generalized nix.Search
320+
if infos, err = nix.Search(query); err != nil {
321+
return "", err
322+
}
310323
}
311324

312325
if len(infos) == 1 {

internal/nix/nix.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ type PrintDevEnvArgs struct {
4444
// PrintDevEnv calls `nix print-dev-env -f <path>` and returns its output. The output contains
4545
// all the environment variables and bash functions required to create a nix shell.
4646
func (*Nix) PrintDevEnv(ctx context.Context, args *PrintDevEnvArgs) (*PrintDevEnvOut, error) {
47+
defer debug.FunctionTimer().End()
4748
defer trace.StartRegion(ctx, "nixPrintDevEnv").End()
4849

4950
var data []byte

internal/nix/search.go

Lines changed: 87 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,14 @@ import (
55
"fmt"
66
"os"
77
"os/exec"
8+
"regexp"
89
"strings"
10+
"time"
911

1012
"github.com/pkg/errors"
1113
"go.jetpack.io/devbox/internal/debug"
14+
"go.jetpack.io/devbox/internal/xdg"
15+
"go.jetpack.io/pkg/filecache"
1216
)
1317

1418
var (
@@ -19,10 +23,10 @@ var (
1923
type Info struct {
2024
// attribute key is different in flakes vs legacy so we should only use it
2125
// if we know exactly which version we are using
22-
AttributeKey string
23-
PName string
24-
Summary string
25-
Version string
26+
AttributeKey string `json:"attribute"`
27+
PName string `json:"pname"`
28+
Summary string `json:"summary"`
29+
Version string `json:"version"`
2630
}
2731

2832
func (i *Info) String() string {
@@ -31,10 +35,11 @@ func (i *Info) String() string {
3135

3236
func Search(url string) (map[string]*Info, error) {
3337
if strings.HasPrefix(url, "runx:") {
34-
// TODO implement runx search
38+
// TODO implement runx search. Also, move this check outside this function: nix package
39+
// should not be handling runx logic.
3540
return map[string]*Info{}, nil
3641
}
37-
return searchSystem(url, "")
42+
return searchSystem(url, "" /* system */)
3843
}
3944

4045
func parseSearchResults(data []byte) map[string]*Info {
@@ -106,3 +111,79 @@ func searchSystem(url, system string) (map[string]*Info, error) {
106111
}
107112
return parseSearchResults(out), nil
108113
}
114+
115+
// allowableQuery specifies the regex that queries for SearchNixpkgsAttribute must match.
116+
var allowableQuery = regexp.MustCompile("^github:NixOS/nixpkgs/[0-9a-f]{40}#[^#]+$")
117+
118+
// SearchNixpkgsAttribute is a wrapper around searchSystem that caches results.
119+
// NOTE: we should be very conservative in where we use this function. `nix search`
120+
// accepts generalized `installable regex` as arguments but is slow. For certain
121+
// queries of the form `nixpkgs/<commit-hash>#attribute`, we can know for sure that
122+
// once `nix search` returns a valid result, it will always be the very same result.
123+
// Hence we can cache it locally and answer future queries fast, by not calling `nix search`.
124+
func SearchNixpkgsAttribute(query string) (map[string]*Info, error) {
125+
if !allowableQuery.MatchString(query) {
126+
return nil, errors.Errorf("invalid query: %s, must match regex: %s", query, allowableQuery)
127+
}
128+
129+
key := cacheKey(query)
130+
131+
// Check if the query was already cached, and return the result if so
132+
cache := filecache.New("devbox/nix", filecache.WithCacheDir(xdg.CacheSubpath("")))
133+
if cachedResults, err := cache.Get(key); err == nil {
134+
var results map[string]*Info
135+
if err := json.Unmarshal(cachedResults, &results); err != nil {
136+
return nil, err
137+
}
138+
return results, nil
139+
} else if !filecacheIsCacheMiss(err) {
140+
return nil, err // genuine error
141+
}
142+
143+
// If not cached, or an update is needed, then call searchSystem
144+
infos, err := searchSystem(query, "" /*system*/)
145+
if err != nil {
146+
return nil, err
147+
}
148+
149+
// Save the results to the cache
150+
marshalled, err := json.Marshal(infos)
151+
if err != nil {
152+
return nil, err
153+
}
154+
// TODO savil: add a SetForever API that does not expire. Time based expiration is not needed here
155+
// because we're caching results that are guaranteed to be stable.
156+
// TODO savil: Make filecache.cache a public struct so it can be passed into other functions
157+
const oneYear = 12 * 30 * 24 * time.Hour
158+
if err := cache.Set(key, marshalled, oneYear); err != nil {
159+
return nil, err
160+
}
161+
162+
return infos, nil
163+
}
164+
165+
// read as: filecache.IsCacheMiss(err)
166+
// TODO savil: this should be implemented in the filecache package
167+
func filecacheIsCacheMiss(err error) bool {
168+
return errors.Is(err, filecache.NotFound) || errors.Is(err, filecache.Expired)
169+
}
170+
171+
// cacheKey sanitizes the search query to be a valid unix filename.
172+
// This cache key is used as the filename to store the cache value, and having a
173+
// representation of the query is important for debuggability.
174+
func cacheKey(query string) string {
175+
// Replace disallowed characters with underscores.
176+
re := regexp.MustCompile(`[:/#@+]`)
177+
sanitized := re.ReplaceAllString(query, "_")
178+
179+
// Remove any remaining invalid characters.
180+
sanitized = regexp.MustCompile(`[^\w\.-]`).ReplaceAllString(sanitized, "")
181+
182+
// Ensure the filename doesn't exceed the maximum length.
183+
const maxLen = 255
184+
if len(sanitized) > maxLen {
185+
sanitized = sanitized[:maxLen]
186+
}
187+
188+
return sanitized
189+
}

internal/nix/search_test.go

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
package nix
2+
3+
import (
4+
"testing"
5+
)
6+
7+
func TestSearchCacheKey(t *testing.T) {
8+
testCases := []struct {
9+
in string
10+
out string
11+
}{
12+
{
13+
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb#go_1_19",
14+
"github_NixOS_nixpkgs_8670e496ffd093b60e74e7fa53526aa5920d09eb_go_1_19",
15+
},
16+
{
17+
"github:nixos/nixpkgs/7d0ed7f2e5aea07ab22ccb338d27fbe347ed2f11#emacsPackages.@",
18+
"github_nixos_nixpkgs_7d0ed7f2e5aea07ab22ccb338d27fbe347ed2f11_emacsPackages._",
19+
},
20+
}
21+
22+
for _, testCase := range testCases {
23+
t.Run(testCase.out, func(t *testing.T) {
24+
out := cacheKey(testCase.in)
25+
if out != testCase.out {
26+
t.Errorf("got %s, want %s", out, testCase.out)
27+
}
28+
})
29+
}
30+
}
31+
32+
func TestAllowableQuery(t *testing.T) {
33+
testCases := []struct {
34+
in string
35+
expected bool
36+
}{
37+
{
38+
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb#go_1_19",
39+
true,
40+
},
41+
{
42+
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb",
43+
false,
44+
},
45+
{
46+
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb#",
47+
false,
48+
},
49+
{
50+
"github:NixOS/nixpkgs/nixpkgs-unstable#go_1_19",
51+
false,
52+
},
53+
}
54+
for _, testCase := range testCases {
55+
t.Run(testCase.in, func(t *testing.T) {
56+
out := allowableQuery.MatchString(testCase.in)
57+
if out != testCase.expected {
58+
t.Errorf("got %t, want %t", out, testCase.expected)
59+
}
60+
})
61+
}
62+
}

0 commit comments

Comments
 (0)