Skip to content

Refactor commit reader (#34542) #34549

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion modules/git/commit.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ type Commit struct {
// CommitSignature represents a git commit signature part.
type CommitSignature struct {
Signature string
Payload string // TODO check if can be reconstruct from the rest of commit information to not have duplicate data
Payload string
}

// Message returns the commit message. Same as retrieving CommitMessage directly.
Expand Down
132 changes: 61 additions & 71 deletions modules/git/commit_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,44 @@ package git
import (
"bufio"
"bytes"
"fmt"
"io"
"strings"
)

const (
commitHeaderGpgsig = "gpgsig"
commitHeaderGpgsigSha256 = "gpgsig-sha256"
)

func assignCommitFields(gitRepo *Repository, commit *Commit, headerKey string, headerValue []byte) error {
if len(headerValue) > 0 && headerValue[len(headerValue)-1] == '\n' {
headerValue = headerValue[:len(headerValue)-1] // remove trailing newline
}
switch headerKey {
case "tree":
objID, err := NewIDFromString(string(headerValue))
if err != nil {
return fmt.Errorf("invalid tree ID %q: %w", string(headerValue), err)
}
commit.Tree = *NewTree(gitRepo, objID)
case "parent":
objID, err := NewIDFromString(string(headerValue))
if err != nil {
return fmt.Errorf("invalid parent ID %q: %w", string(headerValue), err)
}
commit.Parents = append(commit.Parents, objID)
case "author":
commit.Author.Decode(headerValue)
case "committer":
commit.Committer.Decode(headerValue)
case commitHeaderGpgsig, commitHeaderGpgsigSha256:
// if there are duplicate "gpgsig" and "gpgsig-sha256" headers, then the signature must have already been invalid
// so we don't need to handle duplicate headers here
commit.Signature = &CommitSignature{Signature: string(headerValue)}
}
return nil
}

// CommitFromReader will generate a Commit from a provided reader
// We need this to interpret commits from cat-file or cat-file --batch
//
Expand All @@ -21,90 +55,46 @@ func CommitFromReader(gitRepo *Repository, objectID ObjectID, reader io.Reader)
Committer: &Signature{},
}

payloadSB := new(strings.Builder)
signatureSB := new(strings.Builder)
messageSB := new(strings.Builder)
message := false
pgpsig := false

bufReader, ok := reader.(*bufio.Reader)
if !ok {
bufReader = bufio.NewReader(reader)
}

readLoop:
bufReader := bufio.NewReader(reader)
inHeader := true
var payloadSB, messageSB bytes.Buffer
var headerKey string
var headerValue []byte
for {
line, err := bufReader.ReadBytes('\n')
if err != nil {
if err == io.EOF {
if message {
_, _ = messageSB.Write(line)
}
_, _ = payloadSB.Write(line)
break readLoop
}
return nil, err
if err != nil && err != io.EOF {
return nil, fmt.Errorf("unable to read commit %q: %w", objectID.String(), err)
}
if pgpsig {
if len(line) > 0 && line[0] == ' ' {
_, _ = signatureSB.Write(line[1:])
continue
}
pgpsig = false
if len(line) == 0 {
break
}

if !message {
// This is probably not correct but is copied from go-gits interpretation...
trimmed := bytes.TrimSpace(line)
if len(trimmed) == 0 {
message = true
_, _ = payloadSB.Write(line)
continue
}

split := bytes.SplitN(trimmed, []byte{' '}, 2)
var data []byte
if len(split) > 1 {
data = split[1]
if inHeader {
inHeader = !(len(line) == 1 && line[0] == '\n') // still in header if line is not just a newline
k, v, _ := bytes.Cut(line, []byte{' '})
if len(k) != 0 || !inHeader {
if headerKey != "" {
if err = assignCommitFields(gitRepo, commit, headerKey, headerValue); err != nil {
return nil, fmt.Errorf("unable to parse commit %q: %w", objectID.String(), err)
}
}
headerKey = string(k) // it also resets the headerValue to empty string if not inHeader
headerValue = v
} else {
headerValue = append(headerValue, v...)
}

switch string(split[0]) {
case "tree":
commit.Tree = *NewTree(gitRepo, MustIDFromString(string(data)))
if headerKey != commitHeaderGpgsig && headerKey != commitHeaderGpgsigSha256 {
_, _ = payloadSB.Write(line)
case "parent":
commit.Parents = append(commit.Parents, MustIDFromString(string(data)))
_, _ = payloadSB.Write(line)
case "author":
commit.Author = &Signature{}
commit.Author.Decode(data)
_, _ = payloadSB.Write(line)
case "committer":
commit.Committer = &Signature{}
commit.Committer.Decode(data)
_, _ = payloadSB.Write(line)
case "encoding":
_, _ = payloadSB.Write(line)
case "gpgsig":
fallthrough
case "gpgsig-sha256": // FIXME: no intertop, so only 1 exists at present.
_, _ = signatureSB.Write(data)
_ = signatureSB.WriteByte('\n')
pgpsig = true
}
} else {
_, _ = messageSB.Write(line)
_, _ = payloadSB.Write(line)
}
}

commit.CommitMessage = messageSB.String()
commit.Signature = &CommitSignature{
Signature: signatureSB.String(),
Payload: payloadSB.String(),
}
if len(commit.Signature.Signature) == 0 {
commit.Signature = nil
if commit.Signature != nil {
commit.Signature.Payload = payloadSB.String()
}

return commit, nil
}
6 changes: 2 additions & 4 deletions modules/git/commit_sha256_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@ func TestGetFullCommitIDErrorSha256(t *testing.T) {
}

func TestCommitFromReaderSha256(t *testing.T) {
commitString := `9433b2a62b964c17a4485ae180f45f595d3e69d31b786087775e28c6b6399df0 commit 1114
tree e7f9e96dd79c09b078cac8b303a7d3b9d65ff9b734e86060a4d20409fd379f9e
commitString := `tree e7f9e96dd79c09b078cac8b303a7d3b9d65ff9b734e86060a4d20409fd379f9e
parent 26e9ccc29fad747e9c5d9f4c9ddeb7eff61cc45ef6a8dc258cbeb181afc055e8
author Adam Majer <[email protected]> 1698676906 +0100
committer Adam Majer <[email protected]> 1698676906 +0100
Expand Down Expand Up @@ -112,8 +111,7 @@ VAEUo6ecdDxSpyt2naeg9pKus/BRi7P6g4B1hkk/zZstUX/QP4IQuAJbXjkvsC+X
HKRr3NlRM/DygzTyj0gN74uoa0goCIbyAQhiT42nm0cuhM7uN/W0ayrlZjGF1cbR
8NCJUL2Nwj0ywKIavC99Ipkb8AsFwpVT6U6effs6
=xybZ
-----END PGP SIGNATURE-----
`, commitFromReader.Signature.Signature)
-----END PGP SIGNATURE-----`, commitFromReader.Signature.Signature)
assert.Equal(t, `tree e7f9e96dd79c09b078cac8b303a7d3b9d65ff9b734e86060a4d20409fd379f9e
parent 26e9ccc29fad747e9c5d9f4c9ddeb7eff61cc45ef6a8dc258cbeb181afc055e8
author Adam Majer <[email protected]> 1698676906 +0100
Expand Down
12 changes: 4 additions & 8 deletions modules/git/commit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@ func TestGetFullCommitIDError(t *testing.T) {
}

func TestCommitFromReader(t *testing.T) {
commitString := `feaf4ba6bc635fec442f46ddd4512416ec43c2c2 commit 1074
tree f1a6cb52b2d16773290cefe49ad0684b50a4f930
commitString := `tree f1a6cb52b2d16773290cefe49ad0684b50a4f930
parent 37991dec2c8e592043f47155ce4808d4580f9123
author silverwind <[email protected]> 1563741793 +0200
committer silverwind <[email protected]> 1563741793 +0200
Expand Down Expand Up @@ -108,8 +107,7 @@ sD53z/f0J+We4VZjY+pidvA9BGZPFVdR3wd3xGs8/oH6UWaLJAMGkLG6dDb3qDLm
mfeFhT57UbE4qukTDIQ0Y0WM40UYRTakRaDY7ubhXgLgx09Cnp9XTVMsHgT6j9/i
1pxsB104XLWjQHTjr1JtiaBQEwFh9r2OKTcpvaLcbNtYpo7CzOs=
=FRsO
-----END PGP SIGNATURE-----
`, commitFromReader.Signature.Signature)
-----END PGP SIGNATURE-----`, commitFromReader.Signature.Signature)
assert.Equal(t, `tree f1a6cb52b2d16773290cefe49ad0684b50a4f930
parent 37991dec2c8e592043f47155ce4808d4580f9123
author silverwind <[email protected]> 1563741793 +0200
Expand All @@ -126,8 +124,7 @@ empty commit`, commitFromReader.Signature.Payload)
}

func TestCommitWithEncodingFromReader(t *testing.T) {
commitString := `feaf4ba6bc635fec442f46ddd4512416ec43c2c2 commit 1074
tree ca3fad42080dd1a6d291b75acdfc46e5b9b307e5
commitString := `tree ca3fad42080dd1a6d291b75acdfc46e5b9b307e5
parent 47b24e7ab977ed31c5a39989d570847d6d0052af
author KN4CK3R <[email protected]> 1711702962 +0100
committer KN4CK3R <[email protected]> 1711702962 +0100
Expand Down Expand Up @@ -172,8 +169,7 @@ SONRzusmu5n3DgV956REL7x62h7JuqmBz/12HZkr0z0zgXkcZ04q08pSJATX5N1F
yN+tWxTsWg+zhDk96d5Esdo9JMjcFvPv0eioo30GAERaz1hoD7zCMT4jgUFTQwgz
jw4YcO5u
=r3UU
-----END PGP SIGNATURE-----
`, commitFromReader.Signature.Signature)
-----END PGP SIGNATURE-----`, commitFromReader.Signature.Signature)
assert.Equal(t, `tree ca3fad42080dd1a6d291b75acdfc46e5b9b307e5
parent 47b24e7ab977ed31c5a39989d570847d6d0052af
author KN4CK3R <[email protected]> 1711702962 +0100
Expand Down