Skip to content

Commit f5dcafd

Browse files
committed
expression: start tracking parenthesis-type and string position
When we change the expression parser to start parsing both ()s and {}s at once, we will need to know the parenthesis type. To return nice errors we also need to store some position information in the Tree type. Adding these new fields (which need to be pub to make them accessible from descriptor/tr.rs, but which we will later encapsulate better) is mechanical and pretty noisy, so we do it in its own commit to reduce the size of the real "fix Taproot parsing" commit.
1 parent 853a01a commit f5dcafd

File tree

2 files changed

+86
-28
lines changed

2 files changed

+86
-28
lines changed

src/descriptor/tr.rs

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -495,11 +495,11 @@ impl<Pk: FromStrKey> Tr<Pk> {
495495
// Helper function to parse taproot script path
496496
fn parse_tr_script_spend(tree: &expression::Tree,) -> Result<TapTree<Pk>, Error> {
497497
match tree {
498-
expression::Tree { name, args } if !name.is_empty() && args.is_empty() => {
498+
expression::Tree { name, args, .. } if !name.is_empty() && args.is_empty() => {
499499
let script = Miniscript::<Pk, Tap>::from_str(name)?;
500500
Ok(TapTree::Leaf(Arc::new(script)))
501501
}
502-
expression::Tree { name, args } if name.is_empty() && args.len() == 2 => {
502+
expression::Tree { name, args, .. } if name.is_empty() && args.len() == 2 => {
503503
let left = Self::parse_tr_script_spend(&args[0])?;
504504
let right = Self::parse_tr_script_spend(&args[1])?;
505505
Ok(TapTree::combine(left, right))
@@ -597,8 +597,18 @@ fn parse_tr_tree(s: &str) -> Result<expression::Tree, Error> {
597597
if !key.args.is_empty() {
598598
return Err(Error::Unexpected("invalid taproot internal key".to_string()));
599599
}
600-
let internal_key = expression::Tree { name: key.name, args: vec![] };
601-
return Ok(expression::Tree { name: "tr", args: vec![internal_key] });
600+
let internal_key = expression::Tree {
601+
name: key.name,
602+
parens: expression::Parens::None,
603+
children_pos: 0,
604+
args: vec![],
605+
};
606+
return Ok(expression::Tree {
607+
name: "tr",
608+
parens: expression::Parens::Round,
609+
children_pos: 0,
610+
args: vec![internal_key],
611+
});
602612
}
603613
// use str::split_once() method to refactor this when compiler version bumps up
604614
let (key, script) = split_once(rest, ',')
@@ -608,10 +618,20 @@ fn parse_tr_tree(s: &str) -> Result<expression::Tree, Error> {
608618
if !key.args.is_empty() {
609619
return Err(Error::Unexpected("invalid taproot internal key".to_string()));
610620
}
611-
let internal_key = expression::Tree { name: key.name, args: vec![] };
621+
let internal_key = expression::Tree {
622+
name: key.name,
623+
parens: expression::Parens::None,
624+
children_pos: 0,
625+
args: vec![],
626+
};
612627
let tree = expression::Tree::from_slice_delim(script, expression::Delimiter::Taproot)
613628
.map_err(Error::ParseTree)?;
614-
Ok(expression::Tree { name: "tr", args: vec![internal_key, tree] })
629+
Ok(expression::Tree {
630+
name: "tr",
631+
parens: expression::Parens::Round,
632+
children_pos: 0,
633+
args: vec![internal_key, tree],
634+
})
615635
} else {
616636
Err(Error::Unexpected("invalid taproot descriptor".to_string()))
617637
}

src/expression/mod.rs

Lines changed: 60 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ pub const INPUT_CHARSET: &str = "0123456789()[],'/*abcdefgh@:$%{}IJKLMNOPQRSTUVW
2121
pub struct Tree<'a> {
2222
/// The name `x`
2323
pub name: &'a str,
24+
/// Position one past the last character of the node's name. If it has
25+
/// children, the position of the '(' or '{'.
26+
pub children_pos: usize,
27+
/// The type of parentheses surrounding the node's children.
28+
pub parens: Parens,
2429
/// The comma-separated contents of the `(...)`, if any
2530
pub args: Vec<Tree<'a>>,
2631
}
@@ -38,11 +43,17 @@ impl PartialEq for Tree<'_> {
3843
}
3944
}
4045
impl Eq for Tree<'_> {}
41-
// or_b(pk(A),pk(B))
42-
//
43-
// A = musig(musig(B,C),D,E)
44-
// or_b()
45-
// pk(A), pk(B)
46+
47+
/// The type of parentheses surrounding a node's children.
48+
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
49+
pub enum Parens {
50+
/// Node has no children.
51+
None,
52+
/// Round parentheses: `(` and `)`.
53+
Round,
54+
/// Curly braces: `{` and `}`.
55+
Curly,
56+
}
4657

4758
/// Whether to treat `{` and `}` as deliminators when parsing an expression.
4859
#[derive(Copy, Clone, PartialEq, Eq)]
@@ -166,31 +177,45 @@ impl<'a> Tree<'a> {
166177
// Now, knowing it is sane and well-formed, we can easily parse it backward,
167178
// which will yield a post-order right-to-left iterator of its nodes.
168179
let mut stack = Vec::with_capacity(max_depth);
169-
let mut children = None;
180+
let mut children_parens: Option<(Vec<_>, usize, Parens)> = None;
170181
let mut node_name_end = s.len();
171182
let mut tapleaf_depth = 0;
172183
for (pos, ch) in s.bytes().enumerate().rev() {
173184
if ch == cparen {
174185
stack.push(vec![]);
175186
node_name_end = pos;
176187
} else if tapleaf_depth == 0 && ch == b',' {
188+
let (mut args, children_pos, parens) =
189+
children_parens
190+
.take()
191+
.unwrap_or((vec![], node_name_end, Parens::None));
192+
args.reverse();
193+
177194
let top = stack.last_mut().unwrap();
178-
let mut new_tree = Tree {
179-
name: &s[pos + 1..node_name_end],
180-
args: children.take().unwrap_or(vec![]),
181-
};
182-
new_tree.args.reverse();
195+
let new_tree =
196+
Tree { name: &s[pos + 1..node_name_end], children_pos, parens, args };
183197
top.push(new_tree);
184198
node_name_end = pos;
185199
} else if ch == oparen {
200+
let (mut args, children_pos, parens) =
201+
children_parens
202+
.take()
203+
.unwrap_or((vec![], node_name_end, Parens::None));
204+
args.reverse();
205+
186206
let mut top = stack.pop().unwrap();
187-
let mut new_tree = Tree {
188-
name: &s[pos + 1..node_name_end],
189-
args: children.take().unwrap_or(vec![]),
190-
};
191-
new_tree.args.reverse();
207+
let new_tree =
208+
Tree { name: &s[pos + 1..node_name_end], children_pos, parens, args };
192209
top.push(new_tree);
193-
children = Some(top);
210+
children_parens = Some((
211+
top,
212+
pos,
213+
match ch {
214+
b'(' => Parens::Round,
215+
b'{' => Parens::Curly,
216+
_ => unreachable!(),
217+
},
218+
));
194219
node_name_end = pos;
195220
} else if delim == Delimiter::Taproot && ch == b'(' {
196221
tapleaf_depth += 1;
@@ -200,9 +225,12 @@ impl<'a> Tree<'a> {
200225
}
201226

202227
assert_eq!(stack.len(), 0);
203-
let mut children = children.take().unwrap_or(vec![]);
204-
children.reverse();
205-
Ok(Tree { name: &s[..node_name_end], args: children })
228+
let (mut args, children_pos, parens) =
229+
children_parens
230+
.take()
231+
.unwrap_or((vec![], node_name_end, Parens::None));
232+
args.reverse();
233+
Ok(Tree { name: &s[..node_name_end], children_pos, parens, args })
206234
}
207235

208236
/// Parses a tree from a string
@@ -300,9 +328,19 @@ mod tests {
300328
use super::*;
301329

302330
/// Test functions to manually build trees
303-
fn leaf(name: &str) -> Tree { Tree { name, args: vec![] } }
331+
fn leaf(name: &str) -> Tree {
332+
Tree { name, parens: Parens::None, children_pos: name.len(), args: vec![] }
333+
}
334+
335+
fn paren_node<'a>(name: &'a str, mut args: Vec<Tree<'a>>) -> Tree<'a> {
336+
let mut offset = name.len() + 1; // +1 for open paren
337+
for arg in &mut args {
338+
arg.children_pos += offset;
339+
offset += arg.name.len() + 1; // +1 for comma
340+
}
304341

305-
fn paren_node<'a>(name: &'a str, args: Vec<Tree<'a>>) -> Tree<'a> { Tree { name, args } }
342+
Tree { name, parens: Parens::Round, children_pos: name.len(), args }
343+
}
306344

307345
#[test]
308346
fn test_parse_num() {

0 commit comments

Comments
 (0)