Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit e88ca4b

Browse files
committed
format: packfile new interface (wip)
1 parent 49a8238 commit e88ca4b

File tree

12 files changed

+407
-851
lines changed

12 files changed

+407
-851
lines changed

formats/idxfile/encoder.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ func (e *Encoder) encodeEntryField(idx *Idxfile, isHash bool) (int, error) {
8181
} else {
8282
data = ent.CRC32[:]
8383
}
84+
8485
i, err := e.Write(data)
8586
sz += i
8687

formats/packfile/decoder.go

Lines changed: 152 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package packfile
22

33
import (
4+
"bytes"
45
"io"
6+
"os"
57

68
"gopkg.in/src-d/go-git.v4/core"
79
)
@@ -21,37 +23,46 @@ var (
2123
// of objects in the packfile is higher than
2224
// Decoder.MaxObjectsLimit.
2325
ErrMaxObjectsLimitReached = NewError("max. objects limit reached")
24-
2526
// ErrInvalidObject is returned by Decode when an invalid object is
2627
// found in the packfile.
2728
ErrInvalidObject = NewError("invalid git object")
28-
2929
// ErrPackEntryNotFound is returned by Decode when a reference in
3030
// the packfile references and unknown object.
3131
ErrPackEntryNotFound = NewError("can't find a pack entry")
32-
3332
// ErrZLib is returned by Decode when there was an error unzipping
3433
// the packfile contents.
3534
ErrZLib = NewError("zlib reading error")
35+
// ErrDuplicatedObject is returned by Remember if an object appears several
36+
// times in a packfile.
37+
ErrDuplicatedObject = NewError("duplicated object")
38+
// ErrCannotRecall is returned by RecallByOffset or RecallByHash if the object
39+
// to recall cannot be returned.
40+
ErrCannotRecall = NewError("cannot recall object")
3641
)
3742

3843
// Decoder reads and decodes packfiles from an input stream.
3944
type Decoder struct {
40-
p *Parser
41-
s core.ObjectStorage
45+
p *Parser
46+
s core.ObjectStorage
47+
seeker io.Seeker
48+
offsetToObject map[int64]core.Object
49+
hashToOffset map[core.Hash]int64
4250
}
4351

4452
// NewDecoder returns a new Decoder that reads from r.
45-
func NewDecoder(r ReadRecaller, s core.ObjectStorage) *Decoder {
53+
func NewDecoder(s core.ObjectStorage, p *Parser, seeker io.Seeker) *Decoder {
4654
return &Decoder{
47-
p: NewParser(r),
48-
s: s,
55+
p: p,
56+
s: s,
57+
seeker: seeker,
58+
offsetToObject: make(map[int64]core.Object, 0),
59+
hashToOffset: make(map[core.Hash]int64, 0),
4960
}
5061
}
5162

5263
// Decode reads a packfile and stores it in the value pointed to by s.
5364
func (d *Decoder) Decode() error {
54-
count, err := d.p.ReadHeader()
65+
_, count, err := d.p.Header()
5566
if err != nil {
5667
return err
5768
}
@@ -74,21 +85,7 @@ func (d *Decoder) readObjects(tx core.TxObjectStorage, count uint32) error {
7485
// That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB,
7586
// of which 12-20 % is _not_ zlib inflation (ie. is our code).
7687
for i := 0; i < int(count); i++ {
77-
start, err := d.p.Offset()
78-
if err != nil {
79-
return err
80-
}
81-
82-
obj := d.s.NewObject()
83-
if err := d.p.FillObject(obj); err != nil {
84-
if err == io.EOF {
85-
break
86-
}
87-
88-
return err
89-
}
90-
91-
err = d.p.Remember(start, obj)
88+
obj, err := d.readObject()
9289
if err != nil {
9390
return err
9491
}
@@ -101,3 +98,134 @@ func (d *Decoder) readObjects(tx core.TxObjectStorage, count uint32) error {
10198

10299
return nil
103100
}
101+
102+
func (d *Decoder) readObject() (core.Object, error) {
103+
h, err := d.p.NextObjectHeader()
104+
if err != nil {
105+
return nil, err
106+
}
107+
108+
obj := d.s.NewObject()
109+
obj.SetSize(h.Length)
110+
obj.SetType(h.Type)
111+
112+
switch h.Type {
113+
case core.CommitObject, core.TreeObject, core.BlobObject, core.TagObject:
114+
err = d.fillRegularObjectContent(obj)
115+
case core.REFDeltaObject:
116+
err = d.fillREFDeltaObjectContent(obj, h.Reference)
117+
case core.OFSDeltaObject:
118+
err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference)
119+
default:
120+
err = ErrInvalidObject.AddDetails("type %q", h.Type)
121+
}
122+
123+
return obj, d.remember(h.Offset, obj)
124+
}
125+
126+
func (d *Decoder) fillRegularObjectContent(obj core.Object) error {
127+
w, err := obj.Writer()
128+
if err != nil {
129+
return err
130+
}
131+
132+
_, err = d.p.NextObject(w)
133+
return err
134+
}
135+
136+
func (d *Decoder) fillREFDeltaObjectContent(obj core.Object, ref core.Hash) error {
137+
base, err := d.recallByHash(ref)
138+
if err != nil {
139+
return err
140+
}
141+
obj.SetType(base.Type())
142+
if err := d.readAndApplyDelta(obj, base); err != nil {
143+
return err
144+
}
145+
146+
return nil
147+
}
148+
149+
func (d *Decoder) fillOFSDeltaObjectContent(obj core.Object, offset int64) error {
150+
base, err := d.recallByOffset(offset)
151+
if err != nil {
152+
return err
153+
}
154+
155+
obj.SetType(base.Type())
156+
if err := d.readAndApplyDelta(obj, base); err != nil {
157+
return err
158+
}
159+
160+
return nil
161+
}
162+
163+
// ReadAndApplyDelta reads and apply the base patched with the contents
164+
// of a zlib compressed diff data in the delta portion of an object
165+
// entry in the packfile.
166+
func (d *Decoder) readAndApplyDelta(target, base core.Object) error {
167+
buf := bytes.NewBuffer(nil)
168+
if _, err := d.p.NextObject(buf); err != nil {
169+
return err
170+
}
171+
172+
return ApplyDelta(target, base, buf.Bytes())
173+
}
174+
175+
// Remember stores the offset of the object and its hash, but not the
176+
// object itself. This implementation does not check for already stored
177+
// offsets, as it is too expensive to build this information from an
178+
// index every time a get operation is performed on the SeekableReadRecaller.
179+
func (r *Decoder) remember(o int64, obj core.Object) error {
180+
h := obj.Hash()
181+
r.hashToOffset[h] = o
182+
r.offsetToObject[o] = obj
183+
return nil
184+
}
185+
186+
// RecallByHash returns the object for a given hash by looking for it again in
187+
// the io.ReadeSeerker.
188+
func (r *Decoder) recallByHash(h core.Hash) (core.Object, error) {
189+
o, ok := r.hashToOffset[h]
190+
if !ok {
191+
return nil, ErrCannotRecall.AddDetails("hash not found: %s", h)
192+
}
193+
194+
return r.recallByOffset(o)
195+
}
196+
197+
// RecallByOffset returns the object for a given offset by looking for it again in
198+
// the io.ReadeSeerker. For efficiency reasons, this method always find objects by
199+
// offset, even if they have not been remembered or if they have been forgetted.
200+
func (r *Decoder) recallByOffset(o int64) (obj core.Object, err error) {
201+
obj, ok := r.offsetToObject[o]
202+
if ok {
203+
return obj, nil
204+
}
205+
206+
if !ok && r.seeker == nil {
207+
return nil, ErrCannotRecall.AddDetails("no object found at offset %d", o)
208+
}
209+
210+
// remember current offset
211+
beforeJump, err := r.seeker.Seek(0, os.SEEK_CUR)
212+
if err != nil {
213+
return nil, err
214+
}
215+
216+
defer func() {
217+
// jump back
218+
_, seekErr := r.seeker.Seek(beforeJump, os.SEEK_SET)
219+
if err == nil {
220+
err = seekErr
221+
}
222+
}()
223+
224+
// jump to requested offset
225+
_, err = r.seeker.Seek(o, os.SEEK_SET)
226+
if err != nil {
227+
return nil, err
228+
}
229+
230+
return r.readObject()
231+
}

formats/packfile/decoder_test.go

Lines changed: 2 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,12 @@ package packfile
33
import (
44
"bytes"
55
"encoding/base64"
6-
"fmt"
76
"os"
8-
"runtime"
97
"testing"
10-
"time"
118

129
"gopkg.in/src-d/go-git.v4/core"
1310
"gopkg.in/src-d/go-git.v4/storage/memory"
1411

15-
"github.com/dustin/go-humanize"
1612
. "gopkg.in/check.v1"
1713
)
1814

@@ -27,9 +23,8 @@ var packFileWithEmptyObjects = "UEFDSwAAAAIAAAALnw54nKXMQWoDMQxA0b1PoX2hSLIm44FS
2723
func (s *ReaderSuite) TestReadPackfile(c *C) {
2824
data, _ := base64.StdEncoding.DecodeString(packFileWithEmptyObjects)
2925
f := bytes.NewReader(data)
30-
r := NewStream(f)
3126
sto := memory.NewStorage()
32-
d := NewDecoder(r, sto.ObjectStorage())
27+
d := NewDecoder(sto.ObjectStorage(), NewParser(f), nil)
3328

3429
err := d.Decode()
3530
c.Assert(err, IsNil)
@@ -60,9 +55,8 @@ func (s *ReaderSuite) TestReadPackfileREFDelta(c *C) {
6055
func (s *ReaderSuite) testReadPackfileGitFixture(c *C, file string, format Format) {
6156
f, err := os.Open(file)
6257
c.Assert(err, IsNil)
63-
r := NewSeekable(f)
6458
sto := memory.NewStorage()
65-
d := NewDecoder(r, sto.ObjectStorage())
59+
d := NewDecoder(sto.ObjectStorage(), NewParser(f), f)
6660

6761
err = d.Decode()
6862
c.Assert(err, IsNil)
@@ -109,72 +103,3 @@ func AssertObjects(c *C, s *memory.Storage, expects []string) {
109103
c.Assert(obt.Hash().String(), Equals, exp)
110104
}
111105
}
112-
113-
func (s *ReaderSuite) BenchmarkFixtureRef(c *C) {
114-
for i := 0; i < c.N; i++ {
115-
readFromFile(c, "fixtures/git-fixture.ref-delta", REFDeltaFormat)
116-
}
117-
}
118-
119-
func (s *ReaderSuite) BenchmarkFixtureOfs(c *C) {
120-
for i := 0; i < c.N; i++ {
121-
readFromFile(c, "fixtures/git-fixture.ofs-delta", OFSDeltaFormat)
122-
}
123-
}
124-
125-
func (s *ReaderSuite) BenchmarkCandyJS(c *C) {
126-
for i := 0; i < c.N; i++ {
127-
readFromFile(c, "/tmp/go-candyjs", REFDeltaFormat)
128-
}
129-
}
130-
131-
func (s *ReaderSuite) BenchmarkSymfony(c *C) {
132-
for i := 0; i < c.N; i++ {
133-
readFromFile(c, "/tmp/symonfy", REFDeltaFormat)
134-
}
135-
}
136-
137-
func (s *ReaderSuite) BenchmarkGit(c *C) {
138-
for i := 0; i < c.N; i++ {
139-
readFromFile(c, "/tmp/git", REFDeltaFormat)
140-
}
141-
}
142-
143-
func (s *ReaderSuite) _testMemory(c *C, format Format) {
144-
var b, a runtime.MemStats
145-
146-
start := time.Now()
147-
runtime.ReadMemStats(&b)
148-
p := readFromFile(c, "/tmp/symfony.ofs-delta", format)
149-
runtime.ReadMemStats(&a)
150-
151-
fmt.Println("OFS--->")
152-
fmt.Println("Alloc", a.Alloc-b.Alloc, humanize.Bytes(a.Alloc-b.Alloc))
153-
fmt.Println("TotalAlloc", a.TotalAlloc-b.TotalAlloc, humanize.Bytes(a.TotalAlloc-b.TotalAlloc))
154-
fmt.Println("HeapAlloc", a.HeapAlloc-b.HeapAlloc, humanize.Bytes(a.HeapAlloc-b.HeapAlloc))
155-
fmt.Println("HeapSys", a.HeapSys, humanize.Bytes(a.HeapSys-b.HeapSys))
156-
157-
fmt.Println("objects", len(p.Objects))
158-
fmt.Println("time", time.Since(start))
159-
}
160-
161-
func (s *ReaderSuite) _TestMemoryOFS(c *C) {
162-
s._testMemory(c, OFSDeltaFormat)
163-
}
164-
165-
func (s *ReaderSuite) _TestMemoryREF(c *C) {
166-
s._testMemory(c, REFDeltaFormat)
167-
}
168-
169-
func readFromFile(c *C, file string, format Format) *memory.ObjectStorage {
170-
f, err := os.Open(file)
171-
c.Assert(err, IsNil)
172-
r := NewSeekable(f)
173-
sto := memory.NewStorage()
174-
d := NewDecoder(r, sto.ObjectStorage())
175-
176-
err = d.Decode()
177-
c.Assert(err, IsNil)
178-
179-
return sto.ObjectStorage().(*memory.ObjectStorage)
180-
}

0 commit comments

Comments
 (0)