Skip to content

Commit 38a965a

Browse files
committed
metadata: New tag encoding scheme.
EBML tags are encoded in a variable-length unsigned int (vuint), which is clever but causes some tags to be encoded in two bytes while there are really about 180 tags or so. Assuming that there wouldn't be, say, over 1,000 tags in the future, we can use much more efficient encoding scheme. The new scheme should support at most 4,096 tags anyway. This also flattens a scattered tag namespace (did you know that 0xa9 is followed by 0xb0?) and makes a room for autoserialized tags in 0x00 through 0x1f.
1 parent ac20ded commit 38a965a

File tree

2 files changed

+175
-149
lines changed

2 files changed

+175
-149
lines changed

src/librbml/lib.rs

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
1+
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
22
// file at the top-level directory of this distribution and at
33
// http://rust-lang.org/COPYRIGHT.
44
//
@@ -115,6 +115,7 @@ pub enum EbmlEncoderTag {
115115
#[derive(Debug)]
116116
pub enum Error {
117117
IntTooBig(uint),
118+
InvalidTag(uint),
118119
Expected(String),
119120
IoError(std::old_io::IoError),
120121
ApplicationError(String)
@@ -142,7 +143,7 @@ pub mod reader {
142143
EsMapLen, EsMapKey, EsEnumVid, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64,
143144
EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal,
144145
EsEnumBody, EsUint, EsOpaque, EsLabel, EbmlEncoderTag, Doc, TaggedDoc,
145-
Error, IntTooBig, Expected };
146+
Error, IntTooBig, InvalidTag, Expected };
146147

147148
pub type DecodeResult<T> = Result<T, Error>;
148149
// rbml reading
@@ -165,6 +166,18 @@ pub mod reader {
165166
pub next: uint
166167
}
167168

169+
pub fn tag_at(data: &[u8], start: uint) -> DecodeResult<Res> {
170+
let v = data[start] as uint;
171+
if v < 0xf0 {
172+
Ok(Res { val: v, next: start + 1 })
173+
} else if v > 0xf0 {
174+
Ok(Res { val: ((v & 0xf) << 8) | data[start + 1] as uint, next: start + 2 })
175+
} else {
176+
// every tag starting with byte 0xf0 is an overlong form, which is prohibited.
177+
Err(InvalidTag(v))
178+
}
179+
}
180+
168181
#[inline(never)]
169182
fn vuint_at_slow(data: &[u8], start: uint) -> DecodeResult<Res> {
170183
let a = data[start];
@@ -238,7 +251,7 @@ pub mod reader {
238251
}
239252

240253
pub fn doc_at<'a>(data: &'a [u8], start: uint) -> DecodeResult<TaggedDoc<'a>> {
241-
let elt_tag = try!(vuint_at(data, start));
254+
let elt_tag = try!(tag_at(data, start));
242255
let elt_size = try!(vuint_at(data, elt_tag.next));
243256
let end = elt_size.next + elt_size.val;
244257
Ok(TaggedDoc {
@@ -250,7 +263,7 @@ pub mod reader {
250263
pub fn maybe_get_doc<'a>(d: Doc<'a>, tg: uint) -> Option<Doc<'a>> {
251264
let mut pos = d.start;
252265
while pos < d.end {
253-
let elt_tag = try_or!(vuint_at(d.data, pos), None);
266+
let elt_tag = try_or!(tag_at(d.data, pos), None);
254267
let elt_size = try_or!(vuint_at(d.data, elt_tag.next), None);
255268
pos = elt_size.next + elt_size.val;
256269
if elt_tag.val == tg {
@@ -276,7 +289,7 @@ pub mod reader {
276289
{
277290
let mut pos = d.start;
278291
while pos < d.end {
279-
let elt_tag = try_or!(vuint_at(d.data, pos), false);
292+
let elt_tag = try_or!(tag_at(d.data, pos), false);
280293
let elt_size = try_or!(vuint_at(d.data, elt_tag.next), false);
281294
pos = elt_size.next + elt_size.val;
282295
let doc = Doc { data: d.data, start: elt_size.next, end: pos };
@@ -292,7 +305,7 @@ pub mod reader {
292305
{
293306
let mut pos = d.start;
294307
while pos < d.end {
295-
let elt_tag = try_or!(vuint_at(d.data, pos), false);
308+
let elt_tag = try_or!(tag_at(d.data, pos), false);
296309
let elt_size = try_or!(vuint_at(d.data, elt_tag.next), false);
297310
pos = elt_size.next + elt_size.val;
298311
if elt_tag.val == tg {
@@ -718,6 +731,20 @@ pub mod writer {
718731
size_positions: Vec<uint>,
719732
}
720733

734+
fn write_tag<W: Writer>(w: &mut W, n: uint) -> EncodeResult {
735+
if n < 0xf0 {
736+
w.write_all(&[n as u8])
737+
} else if 0x100 <= n && n < 0x1000 {
738+
w.write_all(&[0xf0 | (n >> 8) as u8, n as u8])
739+
} else {
740+
Err(old_io::IoError {
741+
kind: old_io::OtherIoError,
742+
desc: "invalid tag",
743+
detail: Some(format!("{}", n))
744+
})
745+
}
746+
}
747+
721748
fn write_sized_vuint<W: Writer>(w: &mut W, n: uint, size: uint) -> EncodeResult {
722749
match size {
723750
1 => w.write_all(&[0x80u8 | (n as u8)]),
@@ -766,7 +793,7 @@ pub mod writer {
766793
debug!("Start tag {:?}", tag_id);
767794

768795
// Write the enum ID:
769-
try!(write_vuint(self.writer, tag_id));
796+
try!(write_tag(self.writer, tag_id));
770797

771798
// Write a placeholder four-byte size.
772799
self.size_positions.push(try!(self.writer.tell()) as uint);
@@ -795,7 +822,7 @@ pub mod writer {
795822
}
796823

797824
pub fn wr_tagged_bytes(&mut self, tag_id: uint, b: &[u8]) -> EncodeResult {
798-
try!(write_vuint(self.writer, tag_id));
825+
try!(write_tag(self.writer, tag_id));
799826
try!(write_vuint(self.writer, b.len()));
800827
self.writer.write_all(b)
801828
}

0 commit comments

Comments
 (0)