33
33
34
34
class _Database (UserDict .DictMixin ):
35
35
36
- def __init__ (self , file , mode ):
36
+ def __init__ (self , filebasename , mode ):
37
37
self ._mode = mode
38
- self ._dirfile = file + _os .extsep + 'dir'
39
- self ._datfile = file + _os .extsep + 'dat'
40
- self ._bakfile = file + _os .extsep + 'bak'
38
+
39
+ # The directory file is a text file. Each line looks like
40
+ # "%r, (%d, %d)\n" % (key, pos, siz)
41
+ # where key is the string key, pos is the offset into the dat
42
+ # file of the associated value's first byte, and siz is the number
43
+ # of bytes in the associated value.
44
+ self ._dirfile = filebasename + _os .extsep + 'dir'
45
+
46
+ # The data file is a binary file pointed into by the directory
47
+ # file, and holds the values associated with keys. Each value
48
+ # begins at a _BLOCKSIZE-aligned byte offset, and is a raw
49
+ # binary 8-bit string value.
50
+ self ._datfile = filebasename + _os .extsep + 'dat'
51
+ self ._bakfile = filebasename + _os .extsep + 'bak'
52
+
53
+ # The index is an in-memory dict, mirroring the directory file.
54
+ self ._index = None # maps keys to (pos, siz) pairs
55
+
41
56
# Mod by Jack: create data file if needed
42
57
try :
43
58
f = _open (self ._datfile , 'r' )
@@ -46,28 +61,36 @@ def __init__(self, file, mode):
46
61
f .close ()
47
62
self ._update ()
48
63
64
+ # Read directory file into the in-memory index dict.
49
65
def _update (self ):
50
66
self ._index = {}
51
67
try :
52
68
f = _open (self ._dirfile )
53
69
except IOError :
54
70
pass
55
71
else :
56
- while 1 :
57
- line = f .readline ().rstrip ()
58
- if not line : break
59
- key , (pos , siz ) = eval (line )
60
- self ._index [key ] = (pos , siz )
72
+ for line in f :
73
+ key , pos_and_siz_pair = eval (line )
74
+ self ._index [key ] = pos_and_siz_pair
61
75
f .close ()
62
76
77
+ # Write the index dict to the directory file. The original directory
78
+ # file (if any) is renamed with a .bak extension first. If a .bak
79
+ # file currently exists, it's deleted.
63
80
def _commit (self ):
64
- try : _os .unlink (self ._bakfile )
65
- except _os .error : pass
66
- try : _os .rename (self ._dirfile , self ._bakfile )
67
- except _os .error : pass
81
+ try :
82
+ _os .unlink (self ._bakfile )
83
+ except _os .error :
84
+ pass
85
+
86
+ try :
87
+ _os .rename (self ._dirfile , self ._bakfile )
88
+ except _os .error :
89
+ pass
90
+
68
91
f = _open (self ._dirfile , 'w' , self ._mode )
69
92
for key , (pos , siz ) in self ._index .items ():
70
- f .write ("%s , (%s , %s )\n " % (` key` , ` pos` , ` siz` ))
93
+ f .write ("%r , (%d , %d )\n " % (key , pos , siz ))
71
94
f .close ()
72
95
73
96
def __getitem__ (self , key ):
@@ -78,53 +101,71 @@ def __getitem__(self, key):
78
101
f .close ()
79
102
return dat
80
103
104
+ # Append val to the data file, starting at a _BLOCKSIZE-aligned
105
+ # offset. The data file is first padded with NUL bytes (if needed)
106
+ # to get to an aligned offset. Return pair
107
+ # (starting offset of val, len(val))
81
108
def _addval (self , val ):
82
109
f = _open (self ._datfile , 'rb+' )
83
110
f .seek (0 , 2 )
84
111
pos = int (f .tell ())
85
- ## Does not work under MW compiler
86
- ## pos = ((pos + _BLOCKSIZE - 1) / _BLOCKSIZE) * _BLOCKSIZE
87
- ## f.seek(pos)
88
112
npos = ((pos + _BLOCKSIZE - 1 ) // _BLOCKSIZE ) * _BLOCKSIZE
89
113
f .write ('\0 ' * (npos - pos ))
90
114
pos = npos
91
-
92
115
f .write (val )
93
116
f .close ()
94
117
return (pos , len (val ))
95
118
119
+ # Write val to the data file, starting at offset pos. The caller
120
+ # is responsible for ensuring that there's enough room starting at
121
+ # pos to hold val, without overwriting some other value. Return
122
+ # pair (pos, len(val)).
96
123
def _setval (self , pos , val ):
97
124
f = _open (self ._datfile , 'rb+' )
98
125
f .seek (pos )
99
126
f .write (val )
100
127
f .close ()
101
128
return (pos , len (val ))
102
129
103
- def _addkey (self , key , (pos , siz )):
104
- self ._index [key ] = (pos , siz )
130
+ # key is a new key whose associated value starts in the data file
131
+ # at offset pos and with length size. Add an index record to
132
+ # the in-memory index dict, and append one to the index file.
133
+ def _addkey (self , key , pos_and_siz_pair ):
134
+ self ._index [key ] = pos_and_siz_pair
105
135
f = _open (self ._dirfile , 'a' , self ._mode )
106
- f .write ("%s, (%s, %s) \n " % (` key` , `pos` , `siz` ))
136
+ f .write ("%r, %r \n " % (key , pos_and_siz_pair ))
107
137
f .close ()
108
138
109
139
def __setitem__ (self , key , val ):
110
140
if not type (key ) == type ('' ) == type (val ):
111
141
raise TypeError , "keys and values must be strings"
112
- if not key in self ._index :
113
- (pos , siz ) = self ._addval (val )
114
- self ._addkey (key , (pos , siz ))
142
+ if key not in self ._index :
143
+ self ._addkey (key , self ._addval (val ))
115
144
else :
145
+ # See whether the new value is small enough to fit in the
146
+ # (padded) space currently occupied by the old value.
116
147
pos , siz = self ._index [key ]
117
148
oldblocks = (siz + _BLOCKSIZE - 1 ) // _BLOCKSIZE
118
149
newblocks = (len (val ) + _BLOCKSIZE - 1 ) // _BLOCKSIZE
119
150
if newblocks <= oldblocks :
120
- pos , siz = self ._setval (pos , val )
121
- self ._index [key ] = pos , siz
151
+ self ._index [key ] = self ._setval (pos , val )
122
152
else :
123
- pos , siz = self ._addval (val )
124
- self ._index [key ] = pos , siz
153
+ # The new value doesn't fit in the (padded) space used
154
+ # by the old value. The blocks used by the old value are
155
+ # forever lost.
156
+ self ._index [key ] = self ._addval (val )
157
+
158
+ # Note that _index may be out of synch with the directory
159
+ # file now: _setval() and _addval() don't update the directory
160
+ # file.
125
161
126
162
def __delitem__ (self , key ):
163
+ # The blocks used by the associated value are lost.
127
164
del self ._index [key ]
165
+ # XXX It's unclear why we do a _commit() here (the code always
166
+ # XXX has, so I'm not changing it). _setitem__ doesn't try to
167
+ # XXX keep the directory file in synch. Why should we? Or
168
+ # XXX why shouldn't __setitem__?
128
169
self ._commit ()
129
170
130
171
def keys (self ):
0 commit comments