Skip to content

Commit cc24739

Browse files
gcurtismikeland73
andauthored
internal/lock: canonicalize JSON before hashing cache keys (#1648)
Update the cache key hashing functions so they canonicalize JSON before hashing. This avoids busting the devbox environment cache whenever whitespace, comments, or field order changes in devbox.json. It uses the [experimental JSON v2][1] package to perform the canonicalization. Because these functions are only used for caching and must be computed quickly at startup, the hashing algorithm is changed to fnv whereas before it was a mix of md5 and sha256. The hashing functions are also now in a separate cachehash package since they're used for more than just hashing the config. [1]: golang/go#63397 --------- Co-authored-by: Mike Landau <[email protected]>
1 parent aff32e1 commit cc24739

File tree

15 files changed

+231
-82
lines changed

15 files changed

+231
-82
lines changed

go.sum

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,6 @@ github.com/yuin/gopher-lua v0.0.0-20190514113301-1cd887cd7036/go.mod h1:gqRgreBU
339339
github.com/zaffka/mongodb-boltdb-mock v0.0.0-20221014194232-b4bb03fbe3a0/go.mod h1:GsDD1qsG+86MeeCG7ndi6Ei3iGthKL3wQ7PTFigDfNY=
340340
github.com/zealic/go2node v0.1.0 h1:ofxpve08cmLJBwFdI0lPCk9jfwGWOSD+s6216x0oAaA=
341341
github.com/zealic/go2node v0.1.0/go.mod h1:GrkFr+HctXwP7vzcU9RsgtAeJjTQ6Ud0IPCQAqpTfBg=
342-
go.jetpack.io/pkg v0.0.0-20231121212119-81d286a4b5f1 h1:ey+uP0oDBTuxemD8aA6XKACbU9H1ZxYyD92kd+EGVgU=
343-
go.jetpack.io/pkg v0.0.0-20231121212119-81d286a4b5f1/go.mod h1:2X/fgSoPjX2GKEiZ50sW5jJp5oMt5cUDs+1PYzKb+60=
344342
go.jetpack.io/pkg v0.0.0-20231121223835-1a68fb347275 h1:oow+Xe2zqql4z4nvLDYJWkJ/MeMbJ1y4+JiKCchcQbE=
345343
go.jetpack.io/pkg v0.0.0-20231121223835-1a68fb347275/go.mod h1:2X/fgSoPjX2GKEiZ50sW5jJp5oMt5cUDs+1PYzKb+60=
346344
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=

internal/cachehash/hash.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Package cachehash generates non-cryptographic cache keys.
2+
//
3+
// The functions in this package make no guarantees about the underlying hashing
4+
// algorithm. It should only be used for caching, where it's ok if the hash for
5+
// a given input changes.
6+
package cachehash
7+
8+
import (
9+
"bytes"
10+
"crypto/sha256"
11+
"encoding/hex"
12+
"encoding/json"
13+
"errors"
14+
"hash"
15+
"io"
16+
"os"
17+
18+
"go.jetpack.io/devbox/internal/redact"
19+
)
20+
21+
// Bytes returns a hex-encoded hash of b.
22+
func Bytes(b []byte) (string, error) {
23+
h := newHash()
24+
h.Write(b)
25+
return hex.EncodeToString(h.Sum(nil)), nil
26+
}
27+
28+
// File returns a hex-encoded hash of a file's contents.
29+
func File(path string) (string, error) {
30+
f, err := os.Open(path)
31+
if errors.Is(err, os.ErrNotExist) {
32+
return "", nil
33+
}
34+
if err != nil {
35+
return "", err
36+
}
37+
defer f.Close()
38+
39+
h := newHash()
40+
if _, err := io.Copy(h, f); err != nil {
41+
return "", err
42+
}
43+
return hex.EncodeToString(h.Sum(nil)), nil
44+
}
45+
46+
// JSON marshals a to JSON and returns its hex-encoded hash.
47+
func JSON(a any) (string, error) {
48+
b, err := json.Marshal(a)
49+
if err != nil {
50+
return "", redact.Errorf("marshal to json for hashing: %v", err)
51+
}
52+
return Bytes(b)
53+
}
54+
55+
// JSONFile compacts the JSON in a file and returns its hex-encoded hash.
56+
func JSONFile(path string) (string, error) {
57+
b, err := os.ReadFile(path)
58+
if errors.Is(err, os.ErrNotExist) {
59+
return "", nil
60+
}
61+
if err != nil {
62+
return "", err
63+
}
64+
buf := &bytes.Buffer{}
65+
if err := json.Compact(buf, b); err != nil {
66+
return "", redact.Errorf("compact json for hashing: %v", err)
67+
}
68+
return Bytes(buf.Bytes())
69+
}
70+
71+
func newHash() hash.Hash { return sha256.New() }

internal/cachehash/hash_test.go

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
//nolint:varnamelen
2+
package cachehash
3+
4+
import (
5+
"os"
6+
"path/filepath"
7+
"testing"
8+
)
9+
10+
func TestFile(t *testing.T) {
11+
dir := t.TempDir()
12+
13+
ab := filepath.Join(dir, "ab.json")
14+
err := os.WriteFile(ab, []byte(`{"a":"\n","b":"\u000A"}`), 0o644)
15+
if err != nil {
16+
t.Fatal(err)
17+
}
18+
ba := filepath.Join(dir, "ba.json")
19+
err = os.WriteFile(ba, []byte(`{"b":"\n","a":"\u000A"}`), 0o644)
20+
if err != nil {
21+
t.Fatal(err)
22+
}
23+
24+
abHash, err := File(ab)
25+
if err != nil {
26+
t.Errorf("got File(ab) error: %v", err)
27+
}
28+
baHash, err := File(ba)
29+
if err != nil {
30+
t.Errorf("got File(ba) error: %v", err)
31+
}
32+
if abHash == baHash {
33+
t.Errorf("got (File(%q) = %q) == (File(%q) = %q), want different hashes", ab, abHash, ba, baHash)
34+
}
35+
}
36+
37+
func TestFileNotExist(t *testing.T) {
38+
t.TempDir()
39+
hash, err := File(t.TempDir() + "/notafile")
40+
if err != nil {
41+
t.Errorf("got error: %v", err)
42+
}
43+
if hash != "" {
44+
t.Errorf("got non-empty hash %q", hash)
45+
}
46+
}
47+
48+
func TestJSON(t *testing.T) {
49+
a := struct{ A, B string }{"a", "b"}
50+
aHash, err := JSON(a)
51+
if err != nil {
52+
t.Errorf("got JSON(%#q) error: %v", a, err)
53+
}
54+
if aHash == "" {
55+
t.Errorf(`got JSON(%#q) == ""`, a)
56+
}
57+
58+
b := map[string]string{"A": "a", "B": "b"}
59+
bHash, err := JSON(b)
60+
if err != nil {
61+
t.Errorf("got JSON(%#q) error: %v", b, err)
62+
}
63+
if bHash == "" {
64+
t.Errorf(`got JSON(%#q) == ""`, b)
65+
}
66+
if aHash != bHash {
67+
t.Errorf("got (JSON(%#q) = %q) != (JSON(%#q) = %q), want equal hashes", a, aHash, b, bHash)
68+
}
69+
}
70+
71+
func TestJSONUnsupportedType(t *testing.T) {
72+
j := struct{ C chan int }{}
73+
_, err := JSON(j)
74+
if err == nil {
75+
t.Error("got nil error for struct with channel field")
76+
}
77+
}
78+
79+
func TestJSONFile(t *testing.T) {
80+
dir := t.TempDir()
81+
82+
compact := filepath.Join(dir, "compact.json")
83+
err := os.WriteFile(compact, []byte(`{"key":"value"}`), 0o644)
84+
if err != nil {
85+
t.Fatal(err)
86+
}
87+
space := filepath.Join(dir, "space.json")
88+
err = os.WriteFile(space, []byte(`{ "key": "value" }`), 0o644)
89+
if err != nil {
90+
t.Fatal(err)
91+
}
92+
93+
compactHash, err := JSONFile(compact)
94+
if err != nil {
95+
t.Errorf("got JSONFile(ab) error: %v", err)
96+
}
97+
spaceHash, err := JSONFile(space)
98+
if err != nil {
99+
t.Errorf("got JSONFile(ba) error: %v", err)
100+
}
101+
if compactHash != spaceHash {
102+
t.Errorf("got (JSONFile(%q) = %q) != (JSONFile(%q) = %q), want equal hashes", compact, compactHash, space, spaceHash)
103+
}
104+
}
105+
106+
func TestJSONFileNotExist(t *testing.T) {
107+
t.TempDir()
108+
hash, err := JSONFile(t.TempDir() + "/notafile")
109+
if err != nil {
110+
t.Errorf("got error: %v", err)
111+
}
112+
if hash != "" {
113+
t.Errorf("got non-empty hash %q", hash)
114+
}
115+
}

internal/cuecfg/hash.go

Lines changed: 0 additions & 33 deletions
This file was deleted.

internal/devconfig/config.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import (
1818
"github.com/tailscale/hujson"
1919
"go.jetpack.io/devbox/internal/boxcli/featureflag"
2020
"go.jetpack.io/devbox/internal/boxcli/usererr"
21-
"go.jetpack.io/devbox/internal/cuecfg"
21+
"go.jetpack.io/devbox/internal/cachehash"
2222
"go.jetpack.io/devbox/internal/impl/shellcmd"
2323
)
2424

@@ -103,7 +103,9 @@ func (c *Config) Bytes() []byte {
103103
}
104104

105105
func (c *Config) Hash() (string, error) {
106-
return cuecfg.Hash(c)
106+
ast := c.ast.root.Clone()
107+
ast.Minimize()
108+
return cachehash.Bytes(ast.Pack())
107109
}
108110

109111
func (c *Config) Equals(other *Config) bool {

internal/devpkg/package.go

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ package devpkg
55

66
import (
77
"context"
8-
"crypto/md5"
9-
"encoding/hex"
108
"fmt"
119
"io"
1210
"path/filepath"
@@ -17,7 +15,7 @@ import (
1715
"github.com/pkg/errors"
1816
"github.com/samber/lo"
1917
"go.jetpack.io/devbox/internal/boxcli/usererr"
20-
"go.jetpack.io/devbox/internal/cuecfg"
18+
"go.jetpack.io/devbox/internal/cachehash"
2119
"go.jetpack.io/devbox/internal/devconfig"
2220
"go.jetpack.io/devbox/internal/devpkg/pkgtype"
2321
"go.jetpack.io/devbox/internal/impl/devopt"
@@ -428,21 +426,17 @@ func (p *Package) normalizePackageAttributePath() (string, error) {
428426
var ErrCannotBuildPackageOnSystem = errors.New("unable to build for system")
429427

430428
func (p *Package) Hash() string {
431-
hash := ""
432-
if p.installable != (FlakeInstallable{}) {
433-
sum := md5.Sum([]byte(p.installable.String()))
434-
hash = hex.EncodeToString(sum[:])
435-
} else if p.installable.Ref.Type == FlakeTypePath {
429+
sum := ""
430+
if p.installable.Ref.Type == FlakeTypePath {
436431
// For local flakes, use content hash of the flake.nix file to ensure
437432
// user always gets newest flake.
438-
hash, _ = cuecfg.FileHash(filepath.Join(p.installable.Ref.Path, "flake.nix"))
433+
sum, _ = cachehash.File(filepath.Join(p.installable.Ref.Path, "flake.nix"))
439434
}
440435

441-
if hash == "" {
442-
sum := md5.Sum([]byte(p.installable.String()))
443-
hash = hex.EncodeToString(sum[:])
436+
if sum == "" {
437+
sum, _ = cachehash.Bytes([]byte(p.installable.String()))
444438
}
445-
return hash[:min(len(hash), 6)]
439+
return sum[:min(len(sum), 6)]
446440
}
447441

448442
// Equals compares two Packages. This may be an expensive operation since it

internal/devpkg/package_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,28 +30,28 @@ func TestInput(t *testing.T) {
3030
{
3131
pkg: "path:path/to/my-flake#my-package",
3232
isFlake: true,
33-
name: "my-flake-773986",
33+
name: "my-flake-9a897d",
3434
urlWithoutFragment: "path:" + filepath.Join(projectDir, "path/to/my-flake"),
3535
urlForInput: "path:" + filepath.Join(projectDir, "path/to/my-flake"),
3636
},
3737
{
3838
pkg: "path:.#my-package",
3939
isFlake: true,
40-
name: "my-project-20698c",
40+
name: "my-project-45b022",
4141
urlWithoutFragment: "path:" + projectDir,
4242
urlForInput: "path:" + projectDir,
4343
},
4444
{
4545
pkg: "path:/tmp/my-project/path/to/my-flake#my-package",
4646
isFlake: true,
47-
name: "my-flake-773986",
47+
name: "my-flake-9a897d",
4848
urlWithoutFragment: "path:" + filepath.Join(projectDir, "path/to/my-flake"),
4949
urlForInput: "path:" + filepath.Join(projectDir, "path/to/my-flake"),
5050
},
5151
{
5252
pkg: "path:/tmp/my-project/path/to/my-flake",
5353
isFlake: true,
54-
name: "my-flake-eaedce",
54+
name: "my-flake-7d03be",
5555
urlWithoutFragment: "path:" + filepath.Join(projectDir, "path/to/my-flake"),
5656
urlForInput: "path:" + filepath.Join(projectDir, "path/to/my-flake"),
5757
},

internal/impl/devbox.go

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
package impl
66

77
import (
8+
"bytes"
89
"context"
910
"fmt"
1011
"io"
@@ -22,6 +23,7 @@ import (
2223
"github.com/briandowns/spinner"
2324
"github.com/pkg/errors"
2425
"github.com/samber/lo"
26+
"go.jetpack.io/devbox/internal/cachehash"
2527
"go.jetpack.io/devbox/internal/devpkg"
2628
"go.jetpack.io/devbox/internal/devpkg/pkgtype"
2729
"go.jetpack.io/devbox/internal/impl/envpath"
@@ -33,7 +35,6 @@ import (
3335
"go.jetpack.io/devbox/internal/boxcli/usererr"
3436
"go.jetpack.io/devbox/internal/cmdutil"
3537
"go.jetpack.io/devbox/internal/conf"
36-
"go.jetpack.io/devbox/internal/cuecfg"
3738
"go.jetpack.io/devbox/internal/debug"
3839
"go.jetpack.io/devbox/internal/devconfig"
3940
"go.jetpack.io/devbox/internal/envir"
@@ -138,15 +139,20 @@ func (d *Devbox) Config() *devconfig.Config {
138139
}
139140

140141
func (d *Devbox) ConfigHash() (string, error) {
141-
pkgHashes := lo.Map(d.configPackages(), func(i *devpkg.Package, _ int) string { return i.Hash() })
142-
includeHashes := lo.Map(d.Includes(), func(i plugin.Includable, _ int) string { return i.Hash() })
143142
h, err := d.cfg.Hash()
144143
if err != nil {
145144
return "", err
146145
}
147-
return cuecfg.Hash(
148-
h + strings.Join(pkgHashes, "") + strings.Join(includeHashes, ""),
149-
)
146+
147+
buf := bytes.Buffer{}
148+
buf.WriteString(h)
149+
for _, pkg := range d.configPackages() {
150+
buf.WriteString(pkg.Hash())
151+
}
152+
for _, inc := range d.Includes() {
153+
buf.WriteString(inc.Hash())
154+
}
155+
return cachehash.Bytes(buf.Bytes())
150156
}
151157

152158
func (d *Devbox) NixPkgsCommitHash() string {
@@ -1145,12 +1151,12 @@ func (d *Devbox) setCommonHelperEnvVars(env map[string]string) {
11451151
}
11461152

11471153
func (d *Devbox) projectDirHash() string {
1148-
hash, _ := cuecfg.Hash(d.projectDir)
1149-
return hash
1154+
h, _ := cachehash.Bytes([]byte(d.projectDir))
1155+
return h
11501156
}
11511157

11521158
func (d *Devbox) addHashToEnv(env map[string]string) error {
1153-
hash, err := cuecfg.Hash(env)
1159+
hash, err := cachehash.JSON(env)
11541160
if err == nil {
11551161
env[d.shellEnvHashKey()] = hash
11561162
}

0 commit comments

Comments
 (0)