|
| 1 | + |
| 2 | + |
| 3 | +// Simple Extensible Binary Markup Language (ebml) reader and writer on a |
| 4 | +// cursor model. See the specification here: |
| 5 | +// http://www.matroska.org/technical/specs/rfc/index.html |
| 6 | +import option::none; |
| 7 | +import option::some; |
| 8 | + |
| 9 | +type ebml_tag = rec(uint id, uint size); |
| 10 | + |
| 11 | +type ebml_state = rec(ebml_tag ebml_tag, uint tag_pos, uint data_pos); |
| 12 | + |
| 13 | + |
| 14 | +// TODO: When we have module renaming, make "reader" and "writer" separate |
| 15 | +// modules within this file. |
| 16 | + |
| 17 | +// ebml reading |
| 18 | +type doc = rec(u8[] data, uint start, uint end); |
| 19 | + |
| 20 | +fn vint_at(&u8[] data, uint start) -> tup(uint, uint) { |
| 21 | + auto a = data.(start); |
| 22 | + if (a & 0x80u8 != 0u8) { ret tup(a & 0x7fu8 as uint, start + 1u); } |
| 23 | + if (a & 0x40u8 != 0u8) { |
| 24 | + ret tup((a & 0x3fu8 as uint) << 8u | (data.(start + 1u) as uint), |
| 25 | + start + 2u); |
| 26 | + } else if (a & 0x20u8 != 0u8) { |
| 27 | + ret tup((a & 0x1fu8 as uint) << 16u | |
| 28 | + (data.(start + 1u) as uint) << 8u | |
| 29 | + (data.(start + 2u) as uint), start + 3u); |
| 30 | + } else if (a & 0x10u8 != 0u8) { |
| 31 | + ret tup((a & 0x0fu8 as uint) << 24u | |
| 32 | + (data.(start + 1u) as uint) << 16u | |
| 33 | + (data.(start + 2u) as uint) << 8u | |
| 34 | + (data.(start + 3u) as uint), start + 4u); |
| 35 | + } else { log_err "vint too big"; fail; } |
| 36 | +} |
| 37 | + |
| 38 | +fn new_doc(&u8[] data) -> doc { |
| 39 | + ret rec(data=data, start=0u, end=ivec::len[u8](data)); |
| 40 | +} |
| 41 | + |
| 42 | +fn doc_at(&u8[] data, uint start) -> doc { |
| 43 | + auto elt_tag = vint_at(data, start); |
| 44 | + auto elt_size = vint_at(data, elt_tag._1); |
| 45 | + auto end = elt_size._1 + elt_size._0; |
| 46 | + ret rec(data=data, start=elt_size._1, end=end); |
| 47 | +} |
| 48 | + |
| 49 | +fn maybe_get_doc(doc d, uint tg) -> option::t[doc] { |
| 50 | + auto pos = d.start; |
| 51 | + while (pos < d.end) { |
| 52 | + auto elt_tag = vint_at(d.data, pos); |
| 53 | + auto elt_size = vint_at(d.data, elt_tag._1); |
| 54 | + pos = elt_size._1 + elt_size._0; |
| 55 | + if (elt_tag._0 == tg) { |
| 56 | + ret some[doc](rec(data=d.data, start=elt_size._1, end=pos)); |
| 57 | + } |
| 58 | + } |
| 59 | + ret none[doc]; |
| 60 | +} |
| 61 | + |
| 62 | +fn get_doc(doc d, uint tg) -> doc { |
| 63 | + alt (maybe_get_doc(d, tg)) { |
| 64 | + case (some(?d)) { ret d; } |
| 65 | + case (none) { |
| 66 | + log_err "failed to find block with tag " + uint::to_str(tg, 10u); |
| 67 | + fail; |
| 68 | + } |
| 69 | + } |
| 70 | +} |
| 71 | + |
| 72 | +iter docs(doc d) -> tup(uint, doc) { |
| 73 | + auto pos = d.start; |
| 74 | + while (pos < d.end) { |
| 75 | + auto elt_tag = vint_at(d.data, pos); |
| 76 | + auto elt_size = vint_at(d.data, elt_tag._1); |
| 77 | + pos = elt_size._1 + elt_size._0; |
| 78 | + put tup(elt_tag._0, rec(data=d.data, start=elt_size._1, end=pos)); |
| 79 | + } |
| 80 | +} |
| 81 | + |
| 82 | +iter tagged_docs(doc d, uint tg) -> doc { |
| 83 | + auto pos = d.start; |
| 84 | + while (pos < d.end) { |
| 85 | + auto elt_tag = vint_at(d.data, pos); |
| 86 | + auto elt_size = vint_at(d.data, elt_tag._1); |
| 87 | + pos = elt_size._1 + elt_size._0; |
| 88 | + if (elt_tag._0 == tg) { |
| 89 | + put rec(data=d.data, start=elt_size._1, end=pos); |
| 90 | + } |
| 91 | + } |
| 92 | +} |
| 93 | + |
| 94 | +fn doc_data(doc d) -> u8[] { ret ivec::slice[u8](d.data, d.start, d.end); } |
| 95 | + |
| 96 | +fn be_uint_from_bytes(&u8[] data, uint start, uint size) -> uint { |
| 97 | + auto sz = size; |
| 98 | + assert (sz <= 4u); |
| 99 | + auto val = 0u; |
| 100 | + auto pos = start; |
| 101 | + while (sz > 0u) { |
| 102 | + sz -= 1u; |
| 103 | + val += (data.(pos) as uint) << sz * 8u; |
| 104 | + pos += 1u; |
| 105 | + } |
| 106 | + ret val; |
| 107 | +} |
| 108 | + |
| 109 | +fn doc_as_uint(doc d) -> uint { |
| 110 | + ret be_uint_from_bytes(d.data, d.start, d.end - d.start); |
| 111 | +} |
| 112 | + |
| 113 | + |
| 114 | +// ebml writing |
| 115 | +type writer = rec(ioivec::buf_writer writer, mutable uint[] size_positions); |
| 116 | + |
| 117 | +fn write_sized_vint(&ioivec::buf_writer w, uint n, uint size) { |
| 118 | + let u8[] buf; |
| 119 | + alt (size) { |
| 120 | + case (1u) { buf = ~[0x80u8 | (n as u8)]; } |
| 121 | + case (2u) { buf = ~[0x40u8 | (n >> 8u as u8), n & 0xffu as u8]; } |
| 122 | + case (3u) { |
| 123 | + buf = |
| 124 | + ~[0x20u8 | (n >> 16u as u8), n >> 8u & 0xffu as u8, |
| 125 | + n & 0xffu as u8]; |
| 126 | + } |
| 127 | + case (4u) { |
| 128 | + buf = |
| 129 | + ~[0x10u8 | (n >> 24u as u8), n >> 16u & 0xffu as u8, |
| 130 | + n >> 8u & 0xffu as u8, n & 0xffu as u8]; |
| 131 | + } |
| 132 | + case (_) { log_err "vint to write too big"; fail; } |
| 133 | + } |
| 134 | + w.write(buf); |
| 135 | +} |
| 136 | + |
| 137 | +fn write_vint(&ioivec::buf_writer w, uint n) { |
| 138 | + if (n < 0x7fu) { write_sized_vint(w, n, 1u); ret; } |
| 139 | + if (n < 0x4000u) { write_sized_vint(w, n, 2u); ret; } |
| 140 | + if (n < 0x200000u) { write_sized_vint(w, n, 3u); ret; } |
| 141 | + if (n < 0x10000000u) { write_sized_vint(w, n, 4u); ret; } |
| 142 | + log_err "vint to write too big"; |
| 143 | + fail; |
| 144 | +} |
| 145 | + |
| 146 | +fn create_writer(&ioivec::buf_writer w) -> writer { |
| 147 | + let uint[] size_positions = ~[]; |
| 148 | + ret rec(writer=w, mutable size_positions=size_positions); |
| 149 | +} |
| 150 | + |
| 151 | + |
| 152 | +// TODO: Provide a function to write the standard ebml header. |
| 153 | +fn start_tag(&writer w, uint tag_id) { |
| 154 | + // Write the tag ID: |
| 155 | + |
| 156 | + write_vint(w.writer, tag_id); |
| 157 | + // Write a placeholder four-byte size. |
| 158 | + |
| 159 | + w.size_positions += ~[w.writer.tell()]; |
| 160 | + let u8[] zeroes = ~[0u8, 0u8, 0u8, 0u8]; |
| 161 | + w.writer.write(zeroes); |
| 162 | +} |
| 163 | + |
| 164 | +fn end_tag(&writer w) { |
| 165 | + auto last_size_pos = ivec::pop[uint](w.size_positions); |
| 166 | + auto cur_pos = w.writer.tell(); |
| 167 | + w.writer.seek(last_size_pos as int, ioivec::seek_set); |
| 168 | + write_sized_vint(w.writer, cur_pos - last_size_pos - 4u, 4u); |
| 169 | + w.writer.seek(cur_pos as int, ioivec::seek_set); |
| 170 | +} |
| 171 | +// TODO: optionally perform "relaxations" on end_tag to more efficiently |
| 172 | +// encode sizes; this is a fixed point iteration |
0 commit comments