Skip to content

Commit 061b721

Browse files
committed
Rework def ref system to fix various bugs
1 parent 817af72 commit 061b721

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+833
-787
lines changed

src/build_context.rs

Lines changed: 0 additions & 212 deletions
This file was deleted.

src/definitions.rs

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
use std::collections::hash_map::Entry;
2+
3+
use pyo3::prelude::*;
4+
5+
use ahash::AHashMap;
6+
7+
use crate::build_tools::py_err;
8+
9+
// An integer id for the reference
10+
type ReferenceId = usize;
11+
12+
#[derive(Clone, Debug)]
13+
struct Definition<T> {
14+
pub id: ReferenceId,
15+
pub value: Option<T>,
16+
}
17+
18+
#[derive(Clone, Debug)]
19+
pub struct DefinitionsBuilder<T> {
20+
definitions: AHashMap<String, Definition<T>>,
21+
}
22+
23+
impl<T: Clone + std::fmt::Debug> DefinitionsBuilder<T> {
24+
pub fn new() -> Self {
25+
Self {
26+
definitions: AHashMap::new(),
27+
}
28+
}
29+
30+
// Get a ReferenceId for the given reference string.
31+
// This ReferenceId can later be used to retrieve a definition
32+
pub fn get_reference_id(&mut self, reference: &str) -> ReferenceId {
33+
let next_id = self.definitions.len();
34+
// We either need a String copy or two hashmap lookups
35+
// Neither is better than the other
36+
// We opted for the easier outward facing API
37+
match self.definitions.entry(reference.to_string()) {
38+
Entry::Occupied(entry) => entry.get().id,
39+
Entry::Vacant(entry) => {
40+
entry.insert(Definition {
41+
id: next_id,
42+
value: None,
43+
});
44+
next_id
45+
}
46+
}
47+
}
48+
49+
// Add a definition, returning the ReferenceId that maps to it
50+
pub fn add_definition(&mut self, reference: String, value: T) -> PyResult<ReferenceId> {
51+
let next_id = self.definitions.len();
52+
match self.definitions.entry(reference.clone()) {
53+
Entry::Occupied(mut entry) => match entry.get_mut().value.replace(value) {
54+
Some(_) => py_err!(format!("Duplicate ref: `{reference}`")),
55+
None => Ok(entry.get().id),
56+
},
57+
Entry::Vacant(entry) => {
58+
entry.insert(Definition {
59+
id: next_id,
60+
value: Some(value),
61+
});
62+
Ok(next_id)
63+
}
64+
}
65+
}
66+
67+
// Retrieve an item definition using a ReferenceId
68+
// Will raise an error if the definition for that reference does not yet exist
69+
pub fn get_definition(&self, reference_id: ReferenceId) -> PyResult<&T> {
70+
let (reference, def) = match self.definitions.iter().find(|(_, def)| def.id == reference_id) {
71+
Some(v) => v,
72+
None => {
73+
return py_err!(format!(
74+
"Definitions error: no definition for ReferenceId `{reference_id}`"
75+
))
76+
}
77+
};
78+
match def.value.as_ref() {
79+
Some(v) => Ok(v),
80+
None => py_err!(format!(
81+
"Definitions error: attempted to use `{reference}` before it was filled"
82+
)),
83+
}
84+
}
85+
86+
// Consume this Definitions into a vector of items, indexed by each items ReferenceId
87+
pub fn build(self) -> PyResult<Vec<T>> {
88+
// We need to create a vec of defs according to the order in their ids
89+
let mut defs: Vec<(usize, T)> = Vec::new();
90+
for (reference, def) in self.definitions.into_iter() {
91+
match def.value {
92+
None => return py_err!(format!("Definitions error: definition {reference} was never filled")),
93+
Some(v) => defs.push((def.id, v)),
94+
}
95+
}
96+
defs.sort_by_key(|(id, _)| *id);
97+
Ok(defs.into_iter().map(|(_, v)| v).collect())
98+
}
99+
}

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ use pyo3::prelude::*;
1010
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
1111

1212
mod argument_markers;
13-
mod build_context;
1413
mod build_tools;
14+
mod definitions;
1515
mod errors;
1616
mod input;
1717
mod lazy_index_map;

src/recursion_guard.rs

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,19 @@
11
use ahash::AHashSet;
22

3+
type RecursionKey = (
4+
// Identifier for the input object, e.g. the id() of a Python dict
5+
usize,
6+
// Identifier for the node we are traversing, e.g. the validator's id
7+
// Generally only things that can be traversed multiple times, like a definition reference
8+
// need to use the recursion guard, and those things should already have a natural node id
9+
usize,
10+
);
11+
312
/// This is used to avoid cyclic references in input data causing recursive validation and a nasty segmentation fault.
413
/// It's used in `validators/definition` to detect when a reference is reused within itself.
514
#[derive(Debug, Clone, Default)]
615
pub struct RecursionGuard {
7-
ids: Option<AHashSet<usize>>,
16+
ids: Option<AHashSet<RecursionKey>>,
817
// see validators/definition::BACKUP_GUARD_LIMIT for details
918
// depth could be a hashmap {validator_id => depth} but for simplicity and performance it's easier to just
1019
// use one number for all validators
@@ -13,14 +22,14 @@ pub struct RecursionGuard {
1322

1423
impl RecursionGuard {
1524
// insert a new id into the set, return whether the set already had the id in it
16-
pub fn contains_or_insert(&mut self, id: usize) -> bool {
25+
pub fn contains_or_insert(&mut self, obj_id: usize, node_id: usize) -> bool {
1726
match self.ids {
1827
// https://doc.rust-lang.org/std/collections/struct.HashSet.html#method.insert
1928
// "If the set did not have this value present, `true` is returned."
20-
Some(ref mut set) => !set.insert(id),
29+
Some(ref mut set) => !set.insert((obj_id, node_id)),
2130
None => {
22-
let mut set: AHashSet<usize> = AHashSet::with_capacity(10);
23-
set.insert(id);
31+
let mut set: AHashSet<RecursionKey> = AHashSet::with_capacity(10);
32+
set.insert((obj_id, node_id));
2433
self.ids = Some(set);
2534
false
2635
}
@@ -37,10 +46,10 @@ impl RecursionGuard {
3746
self.depth -= 1;
3847
}
3948

40-
pub fn remove(&mut self, id: &usize) {
49+
pub fn remove(&mut self, obj_id: usize, node_id: usize) {
4150
match self.ids {
4251
Some(ref mut set) => {
43-
set.remove(id);
52+
set.remove(&(obj_id, node_id));
4453
}
4554
None => unreachable!(),
4655
};

0 commit comments

Comments
 (0)