Skip to content

Commit eb4f8c4

Browse files
committed
expression: move checksum validation into expression parsing
Right now our character validation logic is spread all over the place. We have `expression::check_valid_chars` and `checksum::verify_checksum` and also `checksum::Engine::input` which all do the same checks, sharing the public array `expression::VALID_CHARS` which is arguably an implementation detail and shouldn't be exposed. In fact, whenever we parse any Miniscript object (a script, a descriptor, a policy of either type), we are first validating the checksum and then parsing into a tree, and our tree-parsing re-does character validation that was already done as part of the checksum check. This commit moves the checksum error logic into expression, where it belongs, and folds the checksum error variant into the expression parsing error, where it belongs (removing it from the mega-enum at the root). However it leaves the Taproot parsing alone, which is still a bit of a mess and will be addressed in a later PR. Benchmarks show that this significantly slows down expression parsing (as expected, since it is now doing checksumming) but significantly speeds up descriptor parsing (which is no longer doing multiple checks for character validity).
1 parent bb2e658 commit eb4f8c4

File tree

12 files changed

+41
-106
lines changed

12 files changed

+41
-106
lines changed

src/descriptor/bare.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ use core::fmt;
1212
use bitcoin::script::{self, PushBytes};
1313
use bitcoin::{Address, Network, ScriptBuf, Weight};
1414

15-
use super::checksum::verify_checksum;
1615
use crate::descriptor::{write_descriptor, DefiniteDescriptorKey};
1716
use crate::expression::{self, FromTree};
1817
use crate::miniscript::context::{ScriptContext, ScriptContextError};
@@ -186,8 +185,7 @@ impl<Pk: FromStrKey> FromTree for Bare<Pk> {
186185
impl<Pk: FromStrKey> core::str::FromStr for Bare<Pk> {
187186
type Err = Error;
188187
fn from_str(s: &str) -> Result<Self, Self::Err> {
189-
let desc_str = verify_checksum(s)?;
190-
let top = expression::Tree::from_str(desc_str)?;
188+
let top = expression::Tree::from_str(s)?;
191189
Self::from_tree(&top)
192190
}
193191
}
@@ -387,8 +385,7 @@ impl<Pk: FromStrKey> FromTree for Pkh<Pk> {
387385
impl<Pk: FromStrKey> core::str::FromStr for Pkh<Pk> {
388386
type Err = Error;
389387
fn from_str(s: &str) -> Result<Self, Self::Err> {
390-
let desc_str = verify_checksum(s)?;
391-
let top = expression::Tree::from_str(desc_str)?;
388+
let top = expression::Tree::from_str(s)?;
392389
Self::from_tree(&top)
393390
}
394391
}

src/descriptor/checksum.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ use core::iter::FromIterator;
1414
use bech32::primitives::checksum::PackedFe32;
1515
use bech32::{Checksum, Fe32};
1616

17-
pub use crate::expression::VALID_CHARS;
1817
use crate::prelude::*;
1918

2019
const CHECKSUM_LENGTH: usize = 8;

src/descriptor/mod.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ use bitcoin::{
2121
};
2222
use sync::Arc;
2323

24-
use self::checksum::verify_checksum;
2524
use crate::miniscript::decode::Terminal;
2625
use crate::miniscript::{satisfy, Legacy, Miniscript, Segwitv0};
2726
use crate::plan::{AssetProvider, Plan};
@@ -988,8 +987,7 @@ impl<Pk: FromStrKey> FromStr for Descriptor<Pk> {
988987
let desc = if s.starts_with("tr(") {
989988
Ok(Descriptor::Tr(Tr::from_str(s)?))
990989
} else {
991-
let desc_str = verify_checksum(s)?;
992-
let top = expression::Tree::from_str(desc_str)?;
990+
let top = expression::Tree::from_str(s)?;
993991
expression::FromTree::from_tree(&top)
994992
}?;
995993

@@ -1840,7 +1838,7 @@ mod tests {
18401838
($secp: ident,$($desc: expr),*) => {
18411839
$(
18421840
match Descriptor::parse_descriptor($secp, $desc) {
1843-
Err(Error::Checksum(_)) => {},
1841+
Err(Error::ParseTree(crate::ParseTreeError::Checksum(_))) => {},
18441842
Err(e) => panic!("Expected bad checksum for {}, got '{}'", $desc, e),
18451843
_ => panic!("Invalid checksum treated as valid: {}", $desc),
18461844
};

src/descriptor/segwitv0.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ use core::fmt;
1010

1111
use bitcoin::{Address, Network, ScriptBuf, Weight};
1212

13-
use super::checksum::verify_checksum;
1413
use super::SortedMultiVec;
1514
use crate::descriptor::{write_descriptor, DefiniteDescriptorKey};
1615
use crate::expression::{self, FromTree};
@@ -288,8 +287,7 @@ impl<Pk: MiniscriptKey> fmt::Display for Wsh<Pk> {
288287
impl<Pk: FromStrKey> core::str::FromStr for Wsh<Pk> {
289288
type Err = Error;
290289
fn from_str(s: &str) -> Result<Self, Self::Err> {
291-
let desc_str = verify_checksum(s)?;
292-
let top = expression::Tree::from_str(desc_str)?;
290+
let top = expression::Tree::from_str(s)?;
293291
Wsh::<Pk>::from_tree(&top)
294292
}
295293
}
@@ -505,8 +503,7 @@ impl<Pk: FromStrKey> crate::expression::FromTree for Wpkh<Pk> {
505503
impl<Pk: FromStrKey> core::str::FromStr for Wpkh<Pk> {
506504
type Err = Error;
507505
fn from_str(s: &str) -> Result<Self, Self::Err> {
508-
let desc_str = verify_checksum(s)?;
509-
let top = expression::Tree::from_str(desc_str)?;
506+
let top = expression::Tree::from_str(s)?;
510507
Self::from_tree(&top)
511508
}
512509
}

src/descriptor/sh.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ use core::fmt;
1313
use bitcoin::script::PushBytes;
1414
use bitcoin::{script, Address, Network, ScriptBuf, Weight};
1515

16-
use super::checksum::verify_checksum;
1716
use super::{SortedMultiVec, Wpkh, Wsh};
1817
use crate::descriptor::{write_descriptor, DefiniteDescriptorKey};
1918
use crate::expression::{self, FromTree};
@@ -109,8 +108,7 @@ impl<Pk: FromStrKey> crate::expression::FromTree for Sh<Pk> {
109108
impl<Pk: FromStrKey> core::str::FromStr for Sh<Pk> {
110109
type Err = Error;
111110
fn from_str(s: &str) -> Result<Self, Self::Err> {
112-
let desc_str = verify_checksum(s)?;
113-
let top = expression::Tree::from_str(desc_str)?;
111+
let top = expression::Tree::from_str(s)?;
114112
Self::from_tree(&top)
115113
}
116114
}

src/descriptor/tr.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,9 @@ impl<Pk: FromStrKey> crate::expression::FromTree for Tr<Pk> {
557557
impl<Pk: FromStrKey> core::str::FromStr for Tr<Pk> {
558558
type Err = Error;
559559
fn from_str(s: &str) -> Result<Self, Self::Err> {
560-
let desc_str = verify_checksum(s)?;
560+
let desc_str = verify_checksum(s)
561+
.map_err(From::from)
562+
.map_err(Error::ParseTree)?;
561563
let top = parse_tr_tree(desc_str)?;
562564
Self::from_tree(&top)
563565
}
@@ -587,8 +589,6 @@ impl<Pk: MiniscriptKey> fmt::Display for Tr<Pk> {
587589

588590
// Helper function to parse string into miniscript tree form
589591
fn parse_tr_tree(s: &str) -> Result<expression::Tree, Error> {
590-
expression::check_valid_chars(s)?;
591-
592592
if s.len() > 3 && &s[..3] == "tr(" && s.as_bytes()[s.len() - 1] == b')' {
593593
let rest = &s[3..s.len() - 1];
594594
if !rest.contains(',') {

src/expression/error.rs

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,22 @@
44
55
use core::fmt;
66

7+
use crate::descriptor::checksum;
78
use crate::prelude::*;
89
use crate::ThresholdError;
910

1011
/// An error parsing an expression tree.
1112
#[derive(Debug, PartialEq, Eq)]
1213
pub enum ParseTreeError {
14+
/// Error validating the checksum or character set.
15+
Checksum(checksum::Error),
1316
/// Expression tree had depth exceeding our hard cap.
1417
MaxRecursionDepthExceeded {
1518
/// The depth of the tree that was attempted to be parsed.
1619
actual: usize,
1720
/// The maximum depth.
1821
maximum: u32,
1922
},
20-
/// Character occurred which was not part of the valid descriptor character set.
21-
InvalidCharacter {
22-
/// The character in question.
23-
ch: char,
24-
/// Its byte-index into the string.
25-
pos: usize,
26-
},
2723
/// After a close-paren, the only valid next characters are close-parens and commas. Got
2824
/// something else.
2925
ExpectedParenOrComma {
@@ -66,15 +62,17 @@ pub enum ParseTreeError {
6662
},
6763
}
6864

65+
impl From<checksum::Error> for ParseTreeError {
66+
fn from(e: checksum::Error) -> Self { Self::Checksum(e) }
67+
}
68+
6969
impl fmt::Display for ParseTreeError {
7070
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
7171
match self {
72+
ParseTreeError::Checksum(ref e) => e.fmt(f),
7273
ParseTreeError::MaxRecursionDepthExceeded { actual, maximum } => {
7374
write!(f, "maximum recursion depth exceeded (max {}, got {})", maximum, actual)
7475
}
75-
ParseTreeError::InvalidCharacter { ch, pos } => {
76-
write!(f, "character `{}` (position {}) not allowed in descriptor", ch, pos)
77-
}
7876
ParseTreeError::ExpectedParenOrComma { ch, pos } => {
7977
write!(
8078
f,
@@ -103,7 +101,17 @@ impl fmt::Display for ParseTreeError {
103101
}
104102
#[cfg(feature = "std")]
105103
impl std::error::Error for ParseTreeError {
106-
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { None }
104+
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
105+
match self {
106+
ParseTreeError::Checksum(ref e) => Some(e),
107+
ParseTreeError::MaxRecursionDepthExceeded { .. }
108+
| ParseTreeError::ExpectedParenOrComma { .. }
109+
| ParseTreeError::UnmatchedOpenParen { .. }
110+
| ParseTreeError::UnmatchedCloseParen { .. }
111+
| ParseTreeError::MismatchedParens { .. }
112+
| ParseTreeError::TrailingCharacter { .. } => None,
113+
}
114+
}
107115
}
108116

109117
/// Error parsing a threshold expression.

src/expression/mod.rs

Lines changed: 11 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -9,32 +9,13 @@ use core::fmt;
99
use core::str::FromStr;
1010

1111
pub use self::error::{ParseThresholdError, ParseTreeError};
12+
use crate::descriptor::checksum::verify_checksum;
1213
use crate::prelude::*;
1314
use crate::{errstr, Error, Threshold, MAX_RECURSION_DEPTH};
1415

1516
/// Allowed characters are descriptor strings.
1617
pub const INPUT_CHARSET: &str = "0123456789()[],'/*abcdefgh@:$%{}IJKLMNOPQRSTUVWXYZ&+-.;<=>?!^_|~ijklmnopqrstuvwxyzABCDEFGH`#\"\\ ";
1718

18-
/// Map of valid characters in descriptor strings.
19-
#[rustfmt::skip]
20-
pub const VALID_CHARS: [Option<u8>; 128] = [
21-
None, None, None, None, None, None, None, None, None, None, None, None, None,
22-
None, None, None, None, None, None, None, None, None, None, None, None, None,
23-
None, None, None, None, None, None, Some(94), Some(59), Some(92), Some(91),
24-
Some(28), Some(29), Some(50), Some(15), Some(10), Some(11), Some(17), Some(51),
25-
Some(14), Some(52), Some(53), Some(16), Some(0), Some(1), Some(2), Some(3),
26-
Some(4), Some(5), Some(6), Some(7), Some(8), Some(9), Some(27), Some(54),
27-
Some(55), Some(56), Some(57), Some(58), Some(26), Some(82), Some(83),
28-
Some(84), Some(85), Some(86), Some(87), Some(88), Some(89), Some(32), Some(33),
29-
Some(34), Some(35), Some(36), Some(37), Some(38), Some(39), Some(40), Some(41),
30-
Some(42), Some(43), Some(44), Some(45), Some(46), Some(47), Some(48), Some(49),
31-
Some(12), Some(93), Some(13), Some(60), Some(61), Some(90), Some(18), Some(19),
32-
Some(20), Some(21), Some(22), Some(23), Some(24), Some(25), Some(64), Some(65),
33-
Some(66), Some(67), Some(68), Some(69), Some(70), Some(71), Some(72), Some(73),
34-
Some(74), Some(75), Some(76), Some(77), Some(78), Some(79), Some(80), Some(81),
35-
Some(30), Some(62), Some(31), Some(63), None,
36-
];
37-
3819
#[derive(Debug)]
3920
/// A token of the form `x(...)` or `x`
4021
pub struct Tree<'a> {
@@ -145,19 +126,17 @@ impl<'a> Tree<'a> {
145126
Self::from_slice_delim(sl, 0u32, '(')
146127
}
147128

148-
fn parse_pre_check(s: &str, open: u8, close: u8) -> Result<(), ParseTreeError> {
149-
// First, scan through string to make sure it is well-formed.
150-
let mut max_depth = 0;
129+
/// Check that a string is a well-formed expression string, with optional
130+
/// checksum.
131+
///
132+
/// Returns the string with the checksum removed.
133+
fn parse_pre_check(s: &str, open: u8, close: u8) -> Result<&str, ParseTreeError> {
151134
// Do ASCII check first; after this we can use .bytes().enumerate() rather
152135
// than .char_indices(), which is *significantly* faster.
153-
for (pos, ch) in s.char_indices() {
154-
if !(32..128).contains(&u32::from(ch)) {
155-
return Err(ParseTreeError::InvalidCharacter { ch, pos });
156-
}
157-
}
136+
let s = verify_checksum(s)?;
158137

138+
let mut max_depth = 0;
159139
let mut open_paren_stack = Vec::with_capacity(128);
160-
161140
for (pos, ch) in s.bytes().enumerate() {
162141
if ch == open {
163142
open_paren_stack.push((ch, pos));
@@ -232,7 +211,7 @@ impl<'a> Tree<'a> {
232211
});
233212
}
234213

235-
Ok(())
214+
Ok(s)
236215
}
237216

238217
pub(crate) fn from_slice_delim(
@@ -242,9 +221,9 @@ impl<'a> Tree<'a> {
242221
) -> Result<(Tree<'a>, &'a str), Error> {
243222
if depth == 0 {
244223
if delim == '{' {
245-
Self::parse_pre_check(sl, b'{', b'}').map_err(Error::ParseTree)?;
224+
sl = Self::parse_pre_check(sl, b'{', b'}').map_err(Error::ParseTree)?;
246225
} else {
247-
Self::parse_pre_check(sl, b'(', b')').map_err(Error::ParseTree)?;
226+
sl = Self::parse_pre_check(sl, b'(', b')').map_err(Error::ParseTree)?;
248227
}
249228
}
250229

@@ -288,8 +267,6 @@ impl<'a> Tree<'a> {
288267
/// Parses a tree from a string
289268
#[allow(clippy::should_implement_trait)] // Cannot use std::str::FromStr because of lifetimes.
290269
pub fn from_str(s: &'a str) -> Result<Tree<'a>, Error> {
291-
check_valid_chars(s)?;
292-
293270
let (top, rem) = Tree::from_slice(s)?;
294271
if rem.is_empty() {
295272
Ok(top)
@@ -328,23 +305,6 @@ impl<'a> Tree<'a> {
328305
}
329306
}
330307

331-
/// Filter out non-ASCII because we byte-index strings all over the
332-
/// place and Rust gets very upset when you splinch a string.
333-
pub fn check_valid_chars(s: &str) -> Result<(), Error> {
334-
for ch in s.bytes() {
335-
if !ch.is_ascii() {
336-
return Err(Error::Unprintable(ch));
337-
}
338-
// Index bounds: We know that ch is ASCII, so it is <= 127.
339-
if VALID_CHARS[ch as usize].is_none() {
340-
return Err(Error::Unexpected(
341-
"Only characters in INPUT_CHARSET are allowed".to_string(),
342-
));
343-
}
344-
}
345-
Ok(())
346-
}
347-
348308
/// Parse a string as a u32, for timelocks or thresholds
349309
pub fn parse_num(s: &str) -> Result<u32, Error> {
350310
if s.len() > 1 {
@@ -418,15 +378,6 @@ mod tests {
418378
assert!(parse_num("-6").is_err());
419379
}
420380

421-
#[test]
422-
fn test_valid_char_map() {
423-
let mut valid_chars = [None; 128];
424-
for (i, ch) in super::INPUT_CHARSET.chars().enumerate() {
425-
valid_chars[ch as usize] = Some(i as u8);
426-
}
427-
assert_eq!(valid_chars, super::VALID_CHARS);
428-
}
429-
430381
#[test]
431382
fn parse_tree_basic() {
432383
assert_eq!(Tree::from_str("thresh").unwrap(), leaf("thresh"));

src/lib.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -492,8 +492,6 @@ pub enum Error {
492492
Threshold(ThresholdError),
493493
/// Invalid threshold.
494494
ParseThreshold(ParseThresholdError),
495-
/// Checksum error parsing a descriptor.
496-
Checksum(descriptor::checksum::Error),
497495
/// Invalid expression tree.
498496
ParseTree(ParseTreeError),
499497
}
@@ -557,7 +555,6 @@ impl fmt::Display for Error {
557555
Error::RelativeLockTime(ref e) => e.fmt(f),
558556
Error::Threshold(ref e) => e.fmt(f),
559557
Error::ParseThreshold(ref e) => e.fmt(f),
560-
Error::Checksum(ref e) => e.fmt(f),
561558
Error::ParseTree(ref e) => e.fmt(f),
562559
}
563560
}
@@ -609,7 +606,6 @@ impl error::Error for Error {
609606
RelativeLockTime(e) => Some(e),
610607
Threshold(e) => Some(e),
611608
ParseThreshold(e) => Some(e),
612-
Checksum(e) => Some(e),
613609
ParseTree(e) => Some(e),
614610
}
615611
}
@@ -650,11 +646,6 @@ impl From<bitcoin::address::P2shError> for Error {
650646
fn from(e: bitcoin::address::P2shError) -> Error { Error::AddrP2shError(e) }
651647
}
652648

653-
#[doc(hidden)]
654-
impl From<descriptor::checksum::Error> for Error {
655-
fn from(e: descriptor::checksum::Error) -> Error { Error::Checksum(e) }
656-
}
657-
658649
#[doc(hidden)]
659650
#[cfg(feature = "compiler")]
660651
impl From<crate::policy::compiler::CompilerError> for Error {

src/miniscript/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1315,7 +1315,7 @@ mod tests {
13151315
assert!(Segwitv0Script::from_str_insane("🌏")
13161316
.unwrap_err()
13171317
.to_string()
1318-
.contains("unprintable character"));
1318+
.contains("invalid character"));
13191319
}
13201320

13211321
#[test]

src/policy/concrete.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -835,8 +835,6 @@ impl<Pk: MiniscriptKey> fmt::Display for Policy<Pk> {
835835
impl<Pk: FromStrKey> str::FromStr for Policy<Pk> {
836836
type Err = Error;
837837
fn from_str(s: &str) -> Result<Policy<Pk>, Error> {
838-
expression::check_valid_chars(s)?;
839-
840838
let tree = expression::Tree::from_str(s)?;
841839
let policy: Policy<Pk> = FromTree::from_tree(&tree)?;
842840
policy.check_timelocks().map_err(Error::ConcretePolicy)?;

src/policy/semantic.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,6 @@ impl<Pk: MiniscriptKey> fmt::Display for Policy<Pk> {
314314
impl<Pk: FromStrKey> str::FromStr for Policy<Pk> {
315315
type Err = Error;
316316
fn from_str(s: &str) -> Result<Policy<Pk>, Error> {
317-
expression::check_valid_chars(s)?;
318-
319317
let tree = expression::Tree::from_str(s)?;
320318
expression::FromTree::from_tree(&tree)
321319
}

0 commit comments

Comments
 (0)