Skip to content

Commit fffc5d3

Browse files
committed
rustc: Write out a path index as well
1 parent 1b65a61 commit fffc5d3

File tree

2 files changed

+124
-67
lines changed

2 files changed

+124
-67
lines changed

src/comp/front/creader.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -268,14 +268,14 @@ impure fn resolve_path(vec[ast.ident] path, vec[u8] data) -> resolve_result {
268268
auto found = false;
269269
while (ebml.bytes_left(ebml_r) > 0u && !found) {
270270
auto ebml_tag = ebml.peek(ebml_r);
271-
if ((ebml_tag.id == metadata.tag_paths_item) ||
272-
(ebml_tag.id == metadata.tag_paths_mod)) {
271+
if ((ebml_tag.id == metadata.tag_paths_data_item) ||
272+
(ebml_tag.id == metadata.tag_paths_data_mod)) {
273273
ebml.move_to_first_child(ebml_r);
274274
auto did_opt = none[ast.def_id];
275275
auto name_opt = none[ast.ident];
276276
while (ebml.bytes_left(ebml_r) > 0u) {
277277
auto inner_tag = ebml.peek(ebml_r);
278-
if (inner_tag.id == metadata.tag_paths_name) {
278+
if (inner_tag.id == metadata.tag_paths_data_name) {
279279
ebml.move_to_first_child(ebml_r);
280280
auto name_data = ebml.read_data(ebml_r);
281281
ebml.move_to_parent(ebml_r);
@@ -335,8 +335,8 @@ impure fn resolve_path(vec[ast.ident] path, vec[u8] data) -> resolve_result {
335335

336336
impure fn move_to_item(&ebml.reader ebml_r, int item_id) {
337337
ebml.move_to_sibling_with_id(ebml_r, metadata.tag_items);
338-
ebml.move_to_child_with_id(ebml_r, metadata.tag_items_index);
339-
ebml.move_to_child_with_id(ebml_r, metadata.tag_items_index_table);
338+
ebml.move_to_child_with_id(ebml_r, metadata.tag_index);
339+
ebml.move_to_child_with_id(ebml_r, metadata.tag_index_table);
340340
ebml.move_to_first_child(ebml_r);
341341

342342
// Move to the bucket.
@@ -347,11 +347,10 @@ impure fn move_to_item(&ebml.reader ebml_r, int item_id) {
347347
ebml.reset_reader(ebml_r, bucket_pos);
348348

349349
// Search to find the item ID in the bucket.
350-
check (ebml.peek(ebml_r).id == metadata.tag_items_index_buckets_bucket);
350+
check (ebml.peek(ebml_r).id == metadata.tag_index_buckets_bucket);
351351
ebml.move_to_first_child(ebml_r);
352352
while (ebml.bytes_left(ebml_r) > 0u) {
353-
if (ebml.peek(ebml_r).id ==
354-
metadata.tag_items_index_buckets_bucket_elt) {
353+
if (ebml.peek(ebml_r).id == metadata.tag_index_buckets_bucket_elt) {
355354
ebml.move_to_first_child(ebml_r);
356355
auto pos = ebml_r.reader.read_be_uint(4u);
357356
auto this_item_id = ebml_r.reader.read_be_uint(4u) as int;

src/comp/middle/metadata.rs

Lines changed: 117 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -19,27 +19,28 @@ import lib.llvm.False;
1919
const uint tag_paths = 0x01u;
2020
const uint tag_items = 0x02u;
2121

22-
const uint tag_paths_name = 0x03u;
23-
const uint tag_paths_item = 0x04u;
24-
const uint tag_paths_mod = 0x05u;
25-
26-
const uint tag_def_id = 0x06u;
27-
28-
const uint tag_items_data = 0x07u;
29-
const uint tag_items_data_item = 0x08u;
30-
const uint tag_items_data_item_kind = 0x09u;
31-
const uint tag_items_data_item_ty_param = 0x0au;
32-
const uint tag_items_data_item_type = 0x0bu;
33-
const uint tag_items_data_item_symbol = 0x0cu;
34-
const uint tag_items_data_item_variant = 0x0du;
35-
const uint tag_items_data_item_tag_id = 0x0eu;
36-
const uint tag_items_data_item_obj_type_id = 0x0fu;
37-
38-
const uint tag_items_index = 0x10u;
39-
const uint tag_items_index_buckets = 0x11u;
40-
const uint tag_items_index_buckets_bucket = 0x12u;
41-
const uint tag_items_index_buckets_bucket_elt = 0x13u;
42-
const uint tag_items_index_table = 0x14u;
22+
const uint tag_paths_data = 0x03u;
23+
const uint tag_paths_data_name = 0x04u;
24+
const uint tag_paths_data_item = 0x05u;
25+
const uint tag_paths_data_mod = 0x06u;
26+
27+
const uint tag_def_id = 0x07u;
28+
29+
const uint tag_items_data = 0x08u;
30+
const uint tag_items_data_item = 0x09u;
31+
const uint tag_items_data_item_kind = 0x0au;
32+
const uint tag_items_data_item_ty_param = 0x0bu;
33+
const uint tag_items_data_item_type = 0x0cu;
34+
const uint tag_items_data_item_symbol = 0x0du;
35+
const uint tag_items_data_item_variant = 0x0eu;
36+
const uint tag_items_data_item_tag_id = 0x0fu;
37+
const uint tag_items_data_item_obj_type_id = 0x10u;
38+
39+
const uint tag_index = 0x11u;
40+
const uint tag_index_buckets = 0x12u;
41+
const uint tag_index_buckets_bucket = 0x13u;
42+
const uint tag_index_buckets_bucket_elt = 0x14u;
43+
const uint tag_index_table = 0x15u;
4344

4445
// Type encoding
4546

@@ -164,7 +165,7 @@ fn C_postr(str s) -> ValueRef {
164165
// Path table encoding
165166

166167
fn encode_name(&ebml.writer ebml_w, str name) {
167-
ebml.start_tag(ebml_w, tag_paths_name);
168+
ebml.start_tag(ebml_w, tag_paths_data_name);
168169
ebml_w.writer.write(_str.bytes(name));
169170
ebml.end_tag(ebml_w);
170171
}
@@ -177,25 +178,37 @@ fn encode_def_id(&ebml.writer ebml_w, &ast.def_id id) {
177178

178179
fn encode_tag_variant_paths(&ebml.writer ebml_w, vec[ast.variant] variants) {
179180
for (ast.variant variant in variants) {
180-
ebml.start_tag(ebml_w, tag_paths_item);
181+
ebml.start_tag(ebml_w, tag_paths_data_item);
181182
encode_name(ebml_w, variant.node.name);
182183
encode_def_id(ebml_w, variant.node.id);
183184
ebml.end_tag(ebml_w);
184185
}
185186
}
186187

188+
fn add_to_index(&ebml.writer ebml_w,
189+
vec[str] path,
190+
&mutable vec[tup(str, uint)] index,
191+
str name) {
192+
auto full_path = path + vec(name);
193+
index += vec(tup(_str.connect(full_path, "."), ebml_w.writer.tell()));
194+
}
195+
187196
fn encode_native_module_item_paths(&ebml.writer ebml_w,
188-
&ast.native_mod nmod) {
197+
&ast.native_mod nmod,
198+
vec[str] path,
199+
&mutable vec[tup(str, uint)] index) {
189200
for (@ast.native_item nitem in nmod.items) {
190201
alt (nitem.node) {
191202
case (ast.native_item_ty(?id, ?did)) {
192-
ebml.start_tag(ebml_w, tag_paths_item);
203+
add_to_index(ebml_w, path, index, id);
204+
ebml.start_tag(ebml_w, tag_paths_data_item);
193205
encode_name(ebml_w, id);
194206
encode_def_id(ebml_w, did);
195207
ebml.end_tag(ebml_w);
196208
}
197209
case (ast.native_item_fn(?id, _, _, _, ?did, _)) {
198-
ebml.start_tag(ebml_w, tag_paths_item);
210+
add_to_index(ebml_w, path, index, id);
211+
ebml.start_tag(ebml_w, tag_paths_data_item);
199212
encode_name(ebml_w, id);
200213
encode_def_id(ebml_w, did);
201214
ebml.end_tag(ebml_w);
@@ -204,51 +217,62 @@ fn encode_native_module_item_paths(&ebml.writer ebml_w,
204217
}
205218
}
206219

207-
fn encode_module_item_paths(&ebml.writer ebml_w, &ast._mod module) {
220+
fn encode_module_item_paths(&ebml.writer ebml_w,
221+
&ast._mod module,
222+
vec[str] path,
223+
&mutable vec[tup(str, uint)] index) {
208224
// TODO: only encode exported items
209225
for (@ast.item it in module.items) {
210226
alt (it.node) {
211227
case (ast.item_const(?id, _, ?tps, ?did, ?ann)) {
212-
ebml.start_tag(ebml_w, tag_paths_item);
228+
add_to_index(ebml_w, path, index, id);
229+
ebml.start_tag(ebml_w, tag_paths_data_item);
213230
encode_name(ebml_w, id);
214231
encode_def_id(ebml_w, did);
215232
ebml.end_tag(ebml_w);
216233
}
217234
case (ast.item_fn(?id, _, ?tps, ?did, ?ann)) {
218-
ebml.start_tag(ebml_w, tag_paths_item);
235+
add_to_index(ebml_w, path, index, id);
236+
ebml.start_tag(ebml_w, tag_paths_data_item);
219237
encode_name(ebml_w, id);
220238
encode_def_id(ebml_w, did);
221239
ebml.end_tag(ebml_w);
222240
}
223241
case (ast.item_mod(?id, ?_mod, ?did)) {
224-
ebml.start_tag(ebml_w, tag_paths_mod);
242+
add_to_index(ebml_w, path, index, id);
243+
ebml.start_tag(ebml_w, tag_paths_data_mod);
225244
encode_name(ebml_w, id);
226245
encode_def_id(ebml_w, did);
227-
encode_module_item_paths(ebml_w, _mod);
246+
encode_module_item_paths(ebml_w, _mod, path + vec(id), index);
228247
ebml.end_tag(ebml_w);
229248
}
230249
case (ast.item_native_mod(?id, ?nmod, ?did)) {
231-
ebml.start_tag(ebml_w, tag_paths_mod);
250+
add_to_index(ebml_w, path, index, id);
251+
ebml.start_tag(ebml_w, tag_paths_data_mod);
232252
encode_name(ebml_w, id);
233253
encode_def_id(ebml_w, did);
234-
encode_native_module_item_paths(ebml_w, nmod);
254+
encode_native_module_item_paths(ebml_w, nmod, path + vec(id),
255+
index);
235256
ebml.end_tag(ebml_w);
236257
}
237258
case (ast.item_ty(?id, _, ?tps, ?did, ?ann)) {
238-
ebml.start_tag(ebml_w, tag_paths_item);
259+
add_to_index(ebml_w, path, index, id);
260+
ebml.start_tag(ebml_w, tag_paths_data_item);
239261
encode_name(ebml_w, id);
240262
encode_def_id(ebml_w, did);
241263
ebml.end_tag(ebml_w);
242264
}
243265
case (ast.item_tag(?id, ?variants, ?tps, ?did)) {
244-
ebml.start_tag(ebml_w, tag_paths_item);
266+
add_to_index(ebml_w, path, index, id);
267+
ebml.start_tag(ebml_w, tag_paths_data_item);
245268
encode_name(ebml_w, id);
246269
encode_tag_variant_paths(ebml_w, variants);
247270
encode_def_id(ebml_w, did);
248271
ebml.end_tag(ebml_w);
249272
}
250273
case (ast.item_obj(?id, _, ?tps, ?odid, ?ann)) {
251-
ebml.start_tag(ebml_w, tag_paths_item);
274+
add_to_index(ebml_w, path, index, id);
275+
ebml.start_tag(ebml_w, tag_paths_data_item);
252276
encode_name(ebml_w, id);
253277
encode_def_id(ebml_w, odid.ctor);
254278
encode_obj_type_id(ebml_w, odid.ty);
@@ -258,10 +282,14 @@ fn encode_module_item_paths(&ebml.writer ebml_w, &ast._mod module) {
258282
}
259283
}
260284

261-
fn encode_item_paths(&ebml.writer ebml_w, @ast.crate crate) {
285+
fn encode_item_paths(&ebml.writer ebml_w, @ast.crate crate)
286+
-> vec[tup(str, uint)] {
287+
let vec[tup(str, uint)] index = vec();
288+
let vec[str] path = vec();
262289
ebml.start_tag(ebml_w, tag_paths);
263-
encode_module_item_paths(ebml_w, crate.node.module);
290+
encode_module_item_paths(ebml_w, crate.node.module, path, index);
264291
ebml.end_tag(ebml_w);
292+
ret index;
265293
}
266294

267295

@@ -442,51 +470,61 @@ fn encode_info_for_items(@trans.crate_ctxt cx, &ebml.writer ebml_w)
442470
}
443471

444472

445-
// Definition ID indexing
473+
// Path and definition ID indexing
474+
475+
// djb's cdb hashes.
446476

447-
fn hash_def_num(int def_num) -> uint {
477+
fn hash_def_num(&int def_num) -> uint {
448478
ret 177573u ^ (def_num as uint);
449479
}
450480

451-
fn create_index(vec[tup(int, uint)] index) -> vec[vec[tup(int, uint)]] {
452-
let vec[vec[tup(int, uint)]] buckets = vec();
481+
fn hash_path(&str s) -> uint {
482+
auto h = 5381u;
483+
for (u8 ch in _str.bytes(s)) {
484+
h = ((h << 5u) + h) ^ (ch as uint);
485+
}
486+
ret h;
487+
}
488+
489+
fn create_index[T](vec[tup(T, uint)] index, fn(&T) -> uint hash_fn)
490+
-> vec[vec[tup(T, uint)]] {
491+
let vec[vec[tup(T, uint)]] buckets = vec();
453492
for each (uint i in _uint.range(0u, 256u)) {
454-
let vec[tup(int, uint)] bucket = vec();
493+
let vec[tup(T, uint)] bucket = vec();
455494
buckets += vec(bucket);
456495
}
457496

458-
for (tup(int, uint) elt in index) {
459-
auto h = hash_def_num(elt._0);
497+
for (tup(T, uint) elt in index) {
498+
auto h = hash_fn(elt._0);
460499
buckets.(h % 256u) += vec(elt);
461500
}
462501

463502
ret buckets;
464503
}
465504

466-
impure fn encode_index(&ebml.writer ebml_w, vec[tup(int, uint)] index) {
505+
impure fn encode_index[T](&ebml.writer ebml_w, vec[vec[tup(T, uint)]] buckets,
506+
impure fn(io.writer, &T) write_fn) {
467507
auto writer = io.new_writer_(ebml_w.writer);
468508

469-
auto buckets = create_index(index);
470-
471-
ebml.start_tag(ebml_w, tag_items_index);
509+
ebml.start_tag(ebml_w, tag_index);
472510

473511
let vec[uint] bucket_locs = vec();
474-
ebml.start_tag(ebml_w, tag_items_index_buckets);
475-
for (vec[tup(int, uint)] bucket in buckets) {
512+
ebml.start_tag(ebml_w, tag_index_buckets);
513+
for (vec[tup(T, uint)] bucket in buckets) {
476514
bucket_locs += vec(ebml_w.writer.tell());
477515

478-
ebml.start_tag(ebml_w, tag_items_index_buckets_bucket);
479-
for (tup(int, uint) elt in bucket) {
480-
ebml.start_tag(ebml_w, tag_items_index_buckets_bucket_elt);
516+
ebml.start_tag(ebml_w, tag_index_buckets_bucket);
517+
for (tup(T, uint) elt in bucket) {
518+
ebml.start_tag(ebml_w, tag_index_buckets_bucket_elt);
481519
writer.write_be_uint(elt._1, 4u);
482-
writer.write_be_uint(elt._0 as uint, 4u);
520+
write_fn(writer, elt._0);
483521
ebml.end_tag(ebml_w);
484522
}
485523
ebml.end_tag(ebml_w);
486524
}
487525
ebml.end_tag(ebml_w);
488526

489-
ebml.start_tag(ebml_w, tag_items_index_table);
527+
ebml.start_tag(ebml_w, tag_index_table);
490528
for (uint pos in bucket_locs) {
491529
writer.write_be_uint(pos, 4u);
492530
}
@@ -496,17 +534,37 @@ impure fn encode_index(&ebml.writer ebml_w, vec[tup(int, uint)] index) {
496534
}
497535

498536

537+
impure fn write_str(io.writer writer, &str s) {
538+
writer.write_str(s);
539+
}
540+
541+
impure fn write_int(io.writer writer, &int n) {
542+
writer.write_be_uint(n as uint, 4u);
543+
}
544+
545+
499546
impure fn encode_metadata(@trans.crate_ctxt cx, @ast.crate crate)
500547
-> ValueRef {
501548
auto string_w = io.string_writer();
502549
auto buf_w = string_w.get_writer().get_buf_writer();
503550
auto ebml_w = ebml.create_writer(buf_w);
504551

505-
encode_item_paths(ebml_w, crate);
552+
// Encode and index the paths.
553+
ebml.start_tag(ebml_w, tag_paths);
554+
auto paths_index = encode_item_paths(ebml_w, crate);
555+
auto str_writer = write_str;
556+
auto path_hasher = hash_path;
557+
auto paths_buckets = create_index[str](paths_index, path_hasher);
558+
encode_index[str](ebml_w, paths_buckets, str_writer);
559+
ebml.end_tag(ebml_w);
506560

561+
// Encode and index the items.
507562
ebml.start_tag(ebml_w, tag_items);
508-
auto index = encode_info_for_items(cx, ebml_w);
509-
encode_index(ebml_w, index);
563+
auto items_index = encode_info_for_items(cx, ebml_w);
564+
auto int_writer = write_int;
565+
auto item_hasher = hash_def_num;
566+
auto items_buckets = create_index[int](items_index, item_hasher);
567+
encode_index[int](ebml_w, items_buckets, int_writer);
510568
ebml.end_tag(ebml_w);
511569

512570
ret C_postr(string_w.get_str());

0 commit comments

Comments
 (0)