Skip to content

[perf] cache nix.searchSystem #1546

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ require (
github.com/tailscale/hujson v0.0.0-20221223112325-20486734a56a
github.com/wk8/go-ordered-map/v2 v2.1.8
github.com/zealic/go2node v0.1.0
go.jetpack.io/pkg v0.0.0-20231012130632-77dcd111db2e
go.jetpack.io/pkg v0.0.0-20231019173032-13f60a9e32b8
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17
golang.org/x/mod v0.12.0
golang.org/x/sync v0.3.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -339,8 +339,8 @@ github.com/yuin/gopher-lua v0.0.0-20190514113301-1cd887cd7036/go.mod h1:gqRgreBU
github.com/zaffka/mongodb-boltdb-mock v0.0.0-20221014194232-b4bb03fbe3a0/go.mod h1:GsDD1qsG+86MeeCG7ndi6Ei3iGthKL3wQ7PTFigDfNY=
github.com/zealic/go2node v0.1.0 h1:ofxpve08cmLJBwFdI0lPCk9jfwGWOSD+s6216x0oAaA=
github.com/zealic/go2node v0.1.0/go.mod h1:GrkFr+HctXwP7vzcU9RsgtAeJjTQ6Ud0IPCQAqpTfBg=
go.jetpack.io/pkg v0.0.0-20231012130632-77dcd111db2e h1:GtqFrE5uUf6rXFk3zaIqIfLfykrqrB8gp6bDmOfH+2s=
go.jetpack.io/pkg v0.0.0-20231012130632-77dcd111db2e/go.mod h1:m49CAcLbpZttEbzoEWC1SKD+/z5mEiFdw0dQxY6AM5Q=
go.jetpack.io/pkg v0.0.0-20231019173032-13f60a9e32b8 h1:xzAAUVfg9uqyJhZVc+ZDTLHtFlMhj/5St5JnVfDxbpk=
go.jetpack.io/pkg v0.0.0-20231019173032-13f60a9e32b8/go.mod h1:m49CAcLbpZttEbzoEWC1SKD+/z5mEiFdw0dQxY6AM5Q=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
Expand Down
25 changes: 19 additions & 6 deletions internal/devpkg/package.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ func (p *Package) NormalizedPackageAttributePath() (string, error) {
}

// normalizePackageAttributePath calls nix search to find the normalized attribute
// path. It is an expensive call (~100ms).
// path. It may be an expensive call (~100ms).
func (p *Package) normalizePackageAttributePath() (string, error) {
var query string
if p.IsDevboxPackage() {
Expand All @@ -302,11 +302,24 @@ func (p *Package) normalizePackageAttributePath() (string, error) {
query = p.String()
}

// We prefer search over just trying to parse the URL because search will
// guarantee that the package exists for the current system.
infos, err := nix.Search(query)
if err != nil {
return "", err
// We prefer nix.Search over just trying to parse the package's "URL" because
// nix.Search will guarantee that the package exists for the current system.
var infos map[string]*nix.Info
var err error
if p.IsDevboxPackage() && !p.IsRunX() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! this feels really understandable to me.

// Perf optimization: For queries of the form nixpkgs/<commit>#foo, we can
// use a nix.Search cache.
//
// This will be slow if its the first time on the user's machine that this
// query is running. Otherwise, it will be cached and fast.
if infos, err = nix.SearchNixpkgsAttribute(query); err != nil {
return "", err
}
} else {
// fallback to the slow but generalized nix.Search
if infos, err = nix.Search(query); err != nil {
return "", err
}
}

if len(infos) == 1 {
Expand Down
1 change: 1 addition & 0 deletions internal/nix/nix.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ type PrintDevEnvArgs struct {
// PrintDevEnv calls `nix print-dev-env -f <path>` and returns its output. The output contains
// all the environment variables and bash functions required to create a nix shell.
func (*Nix) PrintDevEnv(ctx context.Context, args *PrintDevEnvArgs) (*PrintDevEnvOut, error) {
defer debug.FunctionTimer().End()
defer trace.StartRegion(ctx, "nixPrintDevEnv").End()

var data []byte
Expand Down
93 changes: 87 additions & 6 deletions internal/nix/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@ import (
"fmt"
"os"
"os/exec"
"regexp"
"strings"
"time"

"github.com/pkg/errors"
"go.jetpack.io/devbox/internal/debug"
"go.jetpack.io/devbox/internal/xdg"
"go.jetpack.io/pkg/filecache"
)

var (
Expand All @@ -19,10 +23,10 @@ var (
type Info struct {
// attribute key is different in flakes vs legacy so we should only use it
// if we know exactly which version we are using
AttributeKey string
PName string
Summary string
Version string
AttributeKey string `json:"attribute"`
PName string `json:"pname"`
Summary string `json:"summary"`
Version string `json:"version"`
}

func (i *Info) String() string {
Expand All @@ -31,10 +35,11 @@ func (i *Info) String() string {

func Search(url string) (map[string]*Info, error) {
if strings.HasPrefix(url, "runx:") {
// TODO implement runx search
// TODO implement runx search. Also, move this check outside this function: nix package
// should not be handling runx logic.
return map[string]*Info{}, nil
}
return searchSystem(url, "")
return searchSystem(url, "" /* system */)
}

func parseSearchResults(data []byte) map[string]*Info {
Expand Down Expand Up @@ -106,3 +111,79 @@ func searchSystem(url, system string) (map[string]*Info, error) {
}
return parseSearchResults(out), nil
}

// allowableQuery specifies the regex that queries for SearchNixpkgsAttribute must match.
var allowableQuery = regexp.MustCompile("^github:NixOS/nixpkgs/[0-9a-f]{40}#[^#]+$")

// SearchNixpkgsAttribute is a wrapper around searchSystem that caches results.
// NOTE: we should be very conservative in where we use this function. `nix search`
// accepts generalized `installable regex` as arguments but is slow. For certain
// queries of the form `nixpkgs/<commit-hash>#attribute`, we can know for sure that
// once `nix search` returns a valid result, it will always be the very same result.
// Hence we can cache it locally and answer future queries fast, by not calling `nix search`.
func SearchNixpkgsAttribute(query string) (map[string]*Info, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just CachedSearch

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the reasons explained in the comment.

I wanted to keep it specific to search queries I have high confidence should always resolve to the very same result. The nix search api accepts installable regex queries which this doesn't. Hence I chose a very specific name to indicate what scenarios this works with.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see my comment above about flake-reference queries not being suitable to be cached.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we throw in a regex to verify that the input query is ^github:NixOS/nixpkgs/[0-9a-f]{40}#? Just to make sure we don't end up caching a ref like github:NixOS/nixpkgs/nixpkgs-unstable#go_1_19.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good call, done

if !allowableQuery.MatchString(query) {
return nil, errors.Errorf("invalid query: %s, must match regex: %s", query, allowableQuery)
}

key := cacheKey(query)

// Check if the query was already cached, and return the result if so
cache := filecache.New("devbox/nix", filecache.WithCacheDir(xdg.CacheSubpath("")))
if cachedResults, err := cache.Get(key); err == nil {
var results map[string]*Info
if err := json.Unmarshal(cachedResults, &results); err != nil {
return nil, err
}
return results, nil
} else if !filecacheIsCacheMiss(err) {
return nil, err // genuine error
}

// If not cached, or an update is needed, then call searchSystem
infos, err := searchSystem(query, "" /*system*/)
if err != nil {
return nil, err
}

// Save the results to the cache
marshalled, err := json.Marshal(infos)
if err != nil {
return nil, err
}
// TODO savil: add a SetForever API that does not expire. Time based expiration is not needed here
// because we're caching results that are guaranteed to be stable.
// TODO savil: Make filecache.cache a public struct so it can be passed into other functions
const oneYear = 12 * 30 * 24 * time.Hour
if err := cache.Set(key, marshalled, oneYear); err != nil {
return nil, err
}

return infos, nil
}

// read as: filecache.IsCacheMiss(err)
// TODO savil: this should be implemented in the filecache package
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agree!

func filecacheIsCacheMiss(err error) bool {
return errors.Is(err, filecache.NotFound) || errors.Is(err, filecache.Expired)
}

// cacheKey sanitizes the search query to be a valid unix filename.
// This cache key is used as the filename to store the cache value, and having a
// representation of the query is important for debuggability.
func cacheKey(query string) string {
// Replace disallowed characters with underscores.
re := regexp.MustCompile(`[:/#@+]`)
sanitized := re.ReplaceAllString(query, "_")

// Remove any remaining invalid characters.
sanitized = regexp.MustCompile(`[^\w\.-]`).ReplaceAllString(sanitized, "")

// Ensure the filename doesn't exceed the maximum length.
const maxLen = 255
if len(sanitized) > maxLen {
sanitized = sanitized[:maxLen]
}

return sanitized
}
62 changes: 62 additions & 0 deletions internal/nix/search_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package nix

import (
"testing"
)

func TestSearchCacheKey(t *testing.T) {
testCases := []struct {
in string
out string
}{
{
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb#go_1_19",
"github_NixOS_nixpkgs_8670e496ffd093b60e74e7fa53526aa5920d09eb_go_1_19",
},
{
"github:nixos/nixpkgs/7d0ed7f2e5aea07ab22ccb338d27fbe347ed2f11#emacsPackages.@",
"github_nixos_nixpkgs_7d0ed7f2e5aea07ab22ccb338d27fbe347ed2f11_emacsPackages._",
},
}

for _, testCase := range testCases {
t.Run(testCase.out, func(t *testing.T) {
out := cacheKey(testCase.in)
if out != testCase.out {
t.Errorf("got %s, want %s", out, testCase.out)
}
})
}
}

func TestAllowableQuery(t *testing.T) {
testCases := []struct {
in string
expected bool
}{
{
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb#go_1_19",
true,
},
{
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb",
false,
},
{
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb#",
false,
},
{
"github:NixOS/nixpkgs/nixpkgs-unstable#go_1_19",
false,
},
}
for _, testCase := range testCases {
t.Run(testCase.in, func(t *testing.T) {
out := allowableQuery.MatchString(testCase.in)
if out != testCase.expected {
t.Errorf("got %t, want %t", out, testCase.expected)
}
})
}
}