1
1
package packfile
2
2
3
3
import (
4
+ "bytes"
4
5
"io"
6
+ "os"
5
7
6
8
"gopkg.in/src-d/go-git.v4/core"
7
9
)
@@ -21,37 +23,46 @@ var (
21
23
// of objects in the packfile is higher than
22
24
// Decoder.MaxObjectsLimit.
23
25
ErrMaxObjectsLimitReached = NewError ("max. objects limit reached" )
24
-
25
26
// ErrInvalidObject is returned by Decode when an invalid object is
26
27
// found in the packfile.
27
28
ErrInvalidObject = NewError ("invalid git object" )
28
-
29
29
// ErrPackEntryNotFound is returned by Decode when a reference in
30
30
// the packfile references and unknown object.
31
31
ErrPackEntryNotFound = NewError ("can't find a pack entry" )
32
-
33
32
// ErrZLib is returned by Decode when there was an error unzipping
34
33
// the packfile contents.
35
34
ErrZLib = NewError ("zlib reading error" )
35
+ // ErrDuplicatedObject is returned by Remember if an object appears several
36
+ // times in a packfile.
37
+ ErrDuplicatedObject = NewError ("duplicated object" )
38
+ // ErrCannotRecall is returned by RecallByOffset or RecallByHash if the object
39
+ // to recall cannot be returned.
40
+ ErrCannotRecall = NewError ("cannot recall object" )
36
41
)
37
42
38
43
// Decoder reads and decodes packfiles from an input stream.
39
44
type Decoder struct {
40
- p * Parser
41
- s core.ObjectStorage
45
+ p * Parser
46
+ s core.ObjectStorage
47
+ seeker io.Seeker
48
+ offsetToObject map [int64 ]core.Object
49
+ hashToOffset map [core.Hash ]int64
42
50
}
43
51
44
52
// NewDecoder returns a new Decoder that reads from r.
45
- func NewDecoder (r ReadRecaller , s core.ObjectStorage ) * Decoder {
53
+ func NewDecoder (s core.ObjectStorage , p * Parser , seeker io. Seeker ) * Decoder {
46
54
return & Decoder {
47
- p : NewParser (r ),
48
- s : s ,
55
+ p : p ,
56
+ s : s ,
57
+ seeker : seeker ,
58
+ offsetToObject : make (map [int64 ]core.Object , 0 ),
59
+ hashToOffset : make (map [core.Hash ]int64 , 0 ),
49
60
}
50
61
}
51
62
52
63
// Decode reads a packfile and stores it in the value pointed to by s.
53
64
func (d * Decoder ) Decode () error {
54
- count , err := d .p .ReadHeader ()
65
+ _ , count , err := d .p .Header ()
55
66
if err != nil {
56
67
return err
57
68
}
@@ -74,21 +85,7 @@ func (d *Decoder) readObjects(tx core.TxObjectStorage, count uint32) error {
74
85
// That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB,
75
86
// of which 12-20 % is _not_ zlib inflation (ie. is our code).
76
87
for i := 0 ; i < int (count ); i ++ {
77
- start , err := d .p .Offset ()
78
- if err != nil {
79
- return err
80
- }
81
-
82
- obj := d .s .NewObject ()
83
- if err := d .p .FillObject (obj ); err != nil {
84
- if err == io .EOF {
85
- break
86
- }
87
-
88
- return err
89
- }
90
-
91
- err = d .p .Remember (start , obj )
88
+ obj , err := d .readObject ()
92
89
if err != nil {
93
90
return err
94
91
}
@@ -101,3 +98,134 @@ func (d *Decoder) readObjects(tx core.TxObjectStorage, count uint32) error {
101
98
102
99
return nil
103
100
}
101
+
102
+ func (d * Decoder ) readObject () (core.Object , error ) {
103
+ h , err := d .p .NextObjectHeader ()
104
+ if err != nil {
105
+ return nil , err
106
+ }
107
+
108
+ obj := d .s .NewObject ()
109
+ obj .SetSize (h .Length )
110
+ obj .SetType (h .Type )
111
+
112
+ switch h .Type {
113
+ case core .CommitObject , core .TreeObject , core .BlobObject , core .TagObject :
114
+ err = d .fillRegularObjectContent (obj )
115
+ case core .REFDeltaObject :
116
+ err = d .fillREFDeltaObjectContent (obj , h .Reference )
117
+ case core .OFSDeltaObject :
118
+ err = d .fillOFSDeltaObjectContent (obj , h .OffsetReference )
119
+ default :
120
+ err = ErrInvalidObject .AddDetails ("type %q" , h .Type )
121
+ }
122
+
123
+ return obj , d .remember (h .Offset , obj )
124
+ }
125
+
126
+ func (d * Decoder ) fillRegularObjectContent (obj core.Object ) error {
127
+ w , err := obj .Writer ()
128
+ if err != nil {
129
+ return err
130
+ }
131
+
132
+ _ , err = d .p .NextObject (w )
133
+ return err
134
+ }
135
+
136
+ func (d * Decoder ) fillREFDeltaObjectContent (obj core.Object , ref core.Hash ) error {
137
+ base , err := d .recallByHash (ref )
138
+ if err != nil {
139
+ return err
140
+ }
141
+ obj .SetType (base .Type ())
142
+ if err := d .readAndApplyDelta (obj , base ); err != nil {
143
+ return err
144
+ }
145
+
146
+ return nil
147
+ }
148
+
149
+ func (d * Decoder ) fillOFSDeltaObjectContent (obj core.Object , offset int64 ) error {
150
+ base , err := d .recallByOffset (offset )
151
+ if err != nil {
152
+ return err
153
+ }
154
+
155
+ obj .SetType (base .Type ())
156
+ if err := d .readAndApplyDelta (obj , base ); err != nil {
157
+ return err
158
+ }
159
+
160
+ return nil
161
+ }
162
+
163
+ // ReadAndApplyDelta reads and apply the base patched with the contents
164
+ // of a zlib compressed diff data in the delta portion of an object
165
+ // entry in the packfile.
166
+ func (d * Decoder ) readAndApplyDelta (target , base core.Object ) error {
167
+ buf := bytes .NewBuffer (nil )
168
+ if _ , err := d .p .NextObject (buf ); err != nil {
169
+ return err
170
+ }
171
+
172
+ return ApplyDelta (target , base , buf .Bytes ())
173
+ }
174
+
175
+ // Remember stores the offset of the object and its hash, but not the
176
+ // object itself. This implementation does not check for already stored
177
+ // offsets, as it is too expensive to build this information from an
178
+ // index every time a get operation is performed on the SeekableReadRecaller.
179
+ func (r * Decoder ) remember (o int64 , obj core.Object ) error {
180
+ h := obj .Hash ()
181
+ r .hashToOffset [h ] = o
182
+ r .offsetToObject [o ] = obj
183
+ return nil
184
+ }
185
+
186
+ // RecallByHash returns the object for a given hash by looking for it again in
187
+ // the io.ReadeSeerker.
188
+ func (r * Decoder ) recallByHash (h core.Hash ) (core.Object , error ) {
189
+ o , ok := r .hashToOffset [h ]
190
+ if ! ok {
191
+ return nil , ErrCannotRecall .AddDetails ("hash not found: %s" , h )
192
+ }
193
+
194
+ return r .recallByOffset (o )
195
+ }
196
+
197
+ // RecallByOffset returns the object for a given offset by looking for it again in
198
+ // the io.ReadeSeerker. For efficiency reasons, this method always find objects by
199
+ // offset, even if they have not been remembered or if they have been forgetted.
200
+ func (r * Decoder ) recallByOffset (o int64 ) (obj core.Object , err error ) {
201
+ obj , ok := r .offsetToObject [o ]
202
+ if ok {
203
+ return obj , nil
204
+ }
205
+
206
+ if ! ok && r .seeker == nil {
207
+ return nil , ErrCannotRecall .AddDetails ("no object found at offset %d" , o )
208
+ }
209
+
210
+ // remember current offset
211
+ beforeJump , err := r .seeker .Seek (0 , os .SEEK_CUR )
212
+ if err != nil {
213
+ return nil , err
214
+ }
215
+
216
+ defer func () {
217
+ // jump back
218
+ _ , seekErr := r .seeker .Seek (beforeJump , os .SEEK_SET )
219
+ if err == nil {
220
+ err = seekErr
221
+ }
222
+ }()
223
+
224
+ // jump to requested offset
225
+ _ , err = r .seeker .Seek (o , os .SEEK_SET )
226
+ if err != nil {
227
+ return nil , err
228
+ }
229
+
230
+ return r .readObject ()
231
+ }
0 commit comments