Skip to content
This repository was archived by the owner on Sep 30, 2024. It is now read-only.

Commit 80fd112

Browse files
committed
gitserver: Fetch in GitBackend
This PR moves Fetch to the GitBackend to benefit from all the observability and so forth that this package provides. Also, it's a git command. All git commands should live here eventually. Test plan: E2E tests verify that fetching still works.
1 parent d8f15c6 commit 80fd112

File tree

13 files changed

+490
-72
lines changed

13 files changed

+490
-72
lines changed

cmd/gitserver/internal/git/gitcli/command.go

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"context"
66
"fmt"
77
"io"
8+
"os"
89
"os/exec"
910
"runtime"
1011
"strconv"
@@ -47,8 +48,11 @@ var (
4748

4849
type commandOpts struct {
4950
arguments []string
51+
addtlEnv []string
52+
redactor func(string) string
5053

51-
stdin io.Reader
54+
stdin io.Reader
55+
stderr io.Writer
5256
}
5357

5458
func optsFromFuncs(optFns ...CommandOptionFunc) commandOpts {
@@ -68,13 +72,38 @@ func WithArguments(args ...string) CommandOptionFunc {
6872
}
6973
}
7074

75+
// WithEnv sets the given environment variables on the command. os.Environ()
76+
// is always passed, the additional env vars passed here will be appended.
77+
func WithEnv(env ...string) CommandOptionFunc {
78+
return func(o *commandOpts) {
79+
o.addtlEnv = append(o.addtlEnv, env...)
80+
}
81+
}
82+
7183
// WithStdin specifies the reader to use for the command's stdin input.
7284
func WithStdin(stdin io.Reader) CommandOptionFunc {
7385
return func(o *commandOpts) {
7486
o.stdin = stdin
7587
}
7688
}
7789

90+
// WithStderr sets the stderr writer for the command. When set, stderr will be
91+
// written to the passed writer, and also tracked internally for corruption detection
92+
// and error reporting using a io.MultiWriter.
93+
func WithStderr(stderr io.Writer) CommandOptionFunc {
94+
return func(o *commandOpts) {
95+
o.stderr = stderr
96+
}
97+
}
98+
99+
// WithOutputRedactor sets the command recorder redactor function to use for the command,
100+
// and best-effort redacts any outputs from stdout/stderr.
101+
func WithOutputRedactor(f func(string) string) CommandOptionFunc {
102+
return func(o *commandOpts) {
103+
o.redactor = f
104+
}
105+
}
106+
78107
const gitCommandDefaultTimeout = time.Minute
79108

80109
func (g *gitCLIBackend) NewCommand(ctx context.Context, optFns ...CommandOptionFunc) (_ io.ReadCloser, err error) {
@@ -116,10 +145,23 @@ func (g *gitCLIBackend) NewCommand(ctx context.Context, optFns ...CommandOptionF
116145
g.dir.Set(cmd)
117146

118147
stderr, stderrBuf := stderrBuffer()
119-
cmd.Stderr = stderr
148+
stderrWriter := stderr
149+
if opts.stderr != nil {
150+
stderrWriter = io.MultiWriter(stderrBuf, opts.stderr)
151+
}
152+
cmd.Stderr = stderrWriter
153+
154+
if cmd.Env == nil {
155+
cmd.Env = os.Environ()
156+
}
157+
cmd.Env = append(cmd.Env, opts.addtlEnv...)
120158

121159
wrappedCmd := g.rcf.WrapWithRepoName(ctx, logger, g.repoName, cmd)
122160

161+
if opts.redactor != nil {
162+
wrappedCmd = wrappedCmd.WithRedactorFunc(opts.redactor)
163+
}
164+
123165
stdout, err := cmd.StdoutPipe()
124166
if err != nil {
125167
cancel()
@@ -151,6 +193,7 @@ func (g *gitCLIBackend) NewCommand(ctx context.Context, optFns ...CommandOptionF
151193
logger: logger,
152194
git: g,
153195
tr: tr,
196+
redactor: opts.redactor,
154197
}, nil
155198
}
156199

@@ -201,6 +244,7 @@ type cmdReader struct {
201244
closed bool
202245
tr trace.Trace
203246
err error
247+
redactor func(string) string
204248
}
205249

206250
func (rc *cmdReader) Read(p []byte) (n int, err error) {
@@ -242,10 +286,14 @@ func (rc *cmdReader) waitCmd() (err error) {
242286
rc.err = rc.cmd.Wait()
243287

244288
if rc.err != nil {
245-
if checkMaybeCorruptRepo(rc.ctx, rc.logger, rc.git, rc.repoName, rc.stderr.String()) {
246-
rc.err = common.ErrRepoCorrupted{Reason: rc.stderr.String()}
289+
redactedStderr := rc.stderr.String()
290+
if rc.redactor != nil {
291+
redactedStderr = rc.redactor(rc.stderr.String())
292+
}
293+
if checkMaybeCorruptRepo(rc.ctx, rc.logger, rc.git, rc.repoName, redactedStderr) {
294+
rc.err = common.ErrRepoCorrupted{Reason: redactedStderr}
247295
} else {
248-
rc.err = commandFailedError(rc.ctx, err, rc.cmd, rc.stderr.Bytes())
296+
rc.err = commandFailedError(rc.ctx, err, rc.cmd, []byte(redactedStderr))
249297
}
250298
}
251299

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
package gitcli
2+
3+
import (
4+
"bufio"
5+
"bytes"
6+
"context"
7+
"io"
8+
9+
"github.com/sourcegraph/sourcegraph/cmd/gitserver/internal/git"
10+
"github.com/sourcegraph/sourcegraph/cmd/gitserver/internal/urlredactor"
11+
"github.com/sourcegraph/sourcegraph/internal/api"
12+
"github.com/sourcegraph/sourcegraph/lib/errors"
13+
)
14+
15+
func (g *gitCLIBackend) Fetch(ctx context.Context, opt git.FetchOptions) (git.RefUpdateIterator, io.Reader, error) {
16+
redactor := urlredactor.New(opt.RemoteURL)
17+
18+
args, env := buildFetchArgs(opt)
19+
// see issue #7322: skip LFS content in repositories with Git LFS configured.
20+
env = append(env, "GIT_LFS_SKIP_SMUDGE=1")
21+
22+
stderrR, stderrW := io.Pipe()
23+
r, err := g.NewCommand(ctx,
24+
WithArguments(args...),
25+
WithEnv(env...),
26+
WithOutputRedactor(redactor.Redact),
27+
WithStderr(stderrW),
28+
)
29+
if err != nil {
30+
return nil, nil, err
31+
}
32+
33+
return &refUpdateIterator{
34+
stdout: r,
35+
onCancel: func() error {
36+
return errors.Append(stderrR.Close(), stderrW.Close())
37+
},
38+
sc: bufio.NewScanner(r),
39+
}, stderrR, nil
40+
}
41+
42+
type refUpdateIterator struct {
43+
stdout io.ReadCloser
44+
sc *bufio.Scanner
45+
onCancel func() error
46+
}
47+
48+
func (i *refUpdateIterator) Next() (git.RefUpdate, error) {
49+
for i.sc.Scan() {
50+
if len(i.sc.Bytes()) == 0 {
51+
continue
52+
}
53+
return parseRefUpdateLine(i.sc.Bytes())
54+
}
55+
56+
if err := i.sc.Err(); err != nil {
57+
return git.RefUpdate{}, err
58+
}
59+
60+
return git.RefUpdate{}, io.EOF
61+
}
62+
63+
func (i *refUpdateIterator) Close() error {
64+
cancelErr := i.onCancel()
65+
err := i.stdout.Close()
66+
if cancelErr != nil {
67+
err = errors.Append(err, cancelErr)
68+
}
69+
return err
70+
}
71+
72+
func buildFetchArgs(opt git.FetchOptions) (args, env []string) {
73+
env = []string{
74+
// disable password prompt
75+
"GIT_ASKPASS=true",
76+
// Suppress asking to add SSH host key to known_hosts (which will hang because
77+
// the command is non-interactive).
78+
//
79+
// And set a timeout to avoid indefinite hangs if the server is unreachable.
80+
"GIT_SSH_COMMAND=ssh -o BatchMode=yes -o ConnectTimeout=30",
81+
// Identify HTTP requests with a user agent. Please keep the git/ prefix because GitHub breaks the protocol v2
82+
// negotiation of clone URLs without a `.git` suffix (which we use) without it. Don't ask.
83+
"GIT_HTTP_USER_AGENT=git/Sourcegraph-Bot",
84+
}
85+
86+
if opt.TLSConfig.SSLNoVerify {
87+
env = append(env, "GIT_SSL_NO_VERIFY=true")
88+
}
89+
if opt.TLSConfig.SSLCAInfo != "" {
90+
env = append(env, "GIT_SSL_CAINFO="+opt.TLSConfig.SSLCAInfo)
91+
}
92+
93+
// If we have creds in the URL, pass them in via the credHelper instead of
94+
// as part of the URL, because args are visible in `ps` output, leaking the
95+
// credentials easily.
96+
remoteURLArg := opt.RemoteURL.String()
97+
credentialHelper := []string{}
98+
password, ok := opt.RemoteURL.User.Password()
99+
if ok && !opt.RemoteURL.IsSSH() {
100+
// Remove the user section from the remoteURL so that git consults credential
101+
// helpers for the username/password.
102+
ru := *opt.RemoteURL
103+
ru.User = nil
104+
remoteURLArg = ru.String()
105+
106+
// Next up, add out credential helper.
107+
// Note: We add an ADDITIONAL credential helper here, the previous
108+
// one is just unsetting any existing ones.
109+
credentialHelper = []string{"-c", "credential.helper=!f() { echo \"username=$GIT_SG_USERNAME\npassword=$GIT_SG_PASSWORD\"; }; f"}
110+
env = append(env,
111+
"GIT_SG_USERNAME="+opt.RemoteURL.User.Username(),
112+
"GIT_SG_PASSWORD="+password,
113+
)
114+
}
115+
116+
args = []string{
117+
// Unset credential helper because the command is non-interactive.
118+
// Even when we pass a second credential helper for HTTP credentials,
119+
// we will need this. Otherwise, the original credential helper will be used
120+
// as well.
121+
"-c", "credential.helper=",
122+
}
123+
args = append(args, credentialHelper...)
124+
args = append(args,
125+
"-c", "protocol.version=2",
126+
"fetch",
127+
"--progress",
128+
"--prune",
129+
"--porcelain",
130+
remoteURLArg,
131+
)
132+
133+
return args, env
134+
}
135+
136+
func parseRefUpdateLine(line []byte) (u git.RefUpdate, _ error) {
137+
line = bytes.TrimSpace(line)
138+
// format:
139+
// <flag> <old-object-id> <new-object-id> <local-reference>
140+
if len(line) == 0 {
141+
return git.RefUpdate{}, errors.New("empty git ref update output")
142+
}
143+
if line[0] == ' ' {
144+
u.Type = git.RefUpdateTypeFastForwardUpdate
145+
line[0] = 'x'
146+
}
147+
parts := bytes.Fields(line)
148+
if len(parts) != 4 {
149+
return git.RefUpdate{}, errors.Newf("invalid ref update format, expected exactly 4 fields %q", line)
150+
}
151+
152+
if line[0] != 'x' {
153+
switch git.RefUpdateType(line[0]) {
154+
case git.RefUpdateTypeFastForwardUpdate:
155+
u.Type = git.RefUpdateTypeFastForwardUpdate
156+
case git.RefUpdateTypeForcedUpdate:
157+
u.Type = git.RefUpdateTypeForcedUpdate
158+
case git.RefUpdateTypePruned:
159+
u.Type = git.RefUpdateTypePruned
160+
case git.RefUpdateTypeTagUpdate:
161+
u.Type = git.RefUpdateTypeTagUpdate
162+
case git.RefUpdateTypeNewRef:
163+
u.Type = git.RefUpdateTypeNewRef
164+
case git.RefUpdateTypeFailed:
165+
u.Type = git.RefUpdateTypeFailed
166+
case git.RefUpdateTypeUnchanged:
167+
u.Type = git.RefUpdateTypeUnchanged
168+
default:
169+
return git.RefUpdate{}, errors.Newf("invalid ref update type %q", line[0])
170+
}
171+
}
172+
u.OldSHA = api.CommitID(parts[1])
173+
u.NewSHA = api.CommitID(parts[2])
174+
u.LocalReference = string(parts[3])
175+
176+
return u, nil
177+
}

cmd/gitserver/internal/git/iface.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77

88
"github.com/sourcegraph/sourcegraph/internal/api"
99
"github.com/sourcegraph/sourcegraph/internal/gitserver/gitdomain"
10+
"github.com/sourcegraph/sourcegraph/internal/vcs"
1011
)
1112

1213
// GitBackend is the interface through which operations on a git repository can
@@ -90,6 +91,12 @@ type GitBackend interface {
9091
// Aggregations are done by email address.
9192
// If range does not exist, a RevisionNotFoundError is returned.
9293
ContributorCounts(ctx context.Context, opt ContributorCountsOpts) ([]*gitdomain.ContributorCount, error)
94+
// Fetch fetches the configured revspecs from the remote repository using the
95+
// given URL.
96+
// The returned iterator can be used to read all the ref updates in a structured
97+
// way.
98+
// The returned Reader can be used to read the progress of the fetch operation.
99+
Fetch(ctx context.Context, opt FetchOptions) (_ RefUpdateIterator, progress io.Reader, _ error)
93100

94101
// Exec is a temporary helper to run arbitrary git commands from the exec endpoint.
95102
// No new usages of it should be introduced and once the migration is done we will
@@ -193,3 +200,65 @@ type ContributorCountsOpts struct {
193200
// (e.g., "foo/bar/").
194201
Path string
195202
}
203+
204+
// FetchOptions are options for the Fetch method. All options must be specified.
205+
type FetchOptions struct {
206+
// RemoteURL is the URL of the remote to fetch from. It may contain credentials
207+
// in the User field.
208+
RemoteURL *vcs.URL
209+
// Refspecs specifies which refs to fetch and update.
210+
// Example format: {"+refs/heads/*:refs/heads/*", "+refs/tags/*:refs/tags/*"}.
211+
Refspecs []string
212+
// TLSConfig, if set, contains additional information about certificate settings
213+
// to pass to git.
214+
TLSConfig GitTLSConfig
215+
}
216+
217+
type GitTLSConfig struct {
218+
// Whether to not verify the SSL certificate when fetching or pushing over
219+
// HTTPS.
220+
//
221+
// https://git-scm.com/docs/git-config#Documentation/git-config.txt-httpsslVerify
222+
SSLNoVerify bool
223+
224+
// File containing the certificates to verify the peer with when fetching
225+
// or pushing over HTTPS.
226+
//
227+
// https://git-scm.com/docs/git-config#Documentation/git-config.txt-httpsslCAInfo
228+
SSLCAInfo string
229+
}
230+
231+
// RefUpdateIterator is an interface that allows iterating over a set of RefUpdates.
232+
type RefUpdateIterator interface {
233+
// Next returns the next RefUpdate.
234+
Next() (RefUpdate, error)
235+
// Close releases resources associated with the iterator.
236+
Close() error
237+
}
238+
239+
type RefUpdate struct {
240+
Type RefUpdateType
241+
OldSHA api.CommitID
242+
NewSHA api.CommitID
243+
LocalReference string
244+
}
245+
246+
// RefUpdateType indicates what kind of update has been made.
247+
type RefUpdateType byte
248+
249+
const (
250+
// for a successfully fetched fast-forward
251+
RefUpdateTypeFastForwardUpdate RefUpdateType = ' '
252+
// for a successful forced update
253+
RefUpdateTypeForcedUpdate RefUpdateType = '+'
254+
// for a successfully pruned ref
255+
RefUpdateTypePruned RefUpdateType = '-'
256+
// for a successful tag update
257+
RefUpdateTypeTagUpdate RefUpdateType = 't'
258+
// for a successfully fetched new ref
259+
RefUpdateTypeNewRef RefUpdateType = '*'
260+
// for a ref that was rejected or failed to update
261+
RefUpdateTypeFailed RefUpdateType = '!'
262+
// for a ref that was up to date and did not need fetching
263+
RefUpdateTypeUnchanged RefUpdateType = '='
264+
)

0 commit comments

Comments
 (0)