Skip to content

Commit 460400d

Browse files
committed
Merge #773: refactor expression parsing and checksum checking
eb4f8c4 expression: move checksum validation into expression parsing (Andrew Poelstra) bb2e658 expression: add explicit well-formedness check to parser (Andrew Poelstra) 9b03043 expression: add some unit tests (Andrew Poelstra) ad85f92 expression: implement PartialEq/Eq for trees (Andrew Poelstra) b0508d8 checksum: clean up errors and error paths (Andrew Poelstra) Pull request description: As a step toward rewriting the expression parser to be non-recursive, add a pre-parsing well-formedness check, which verifies that an expression is well-formed, uses only the correct character set, and that the checksum (if present) is valid. Along the way, replace the error types returned from the `expression` module with a new more-precise one which can identify the location of parse errors (and identify what the error was in a more correct way). This improves our error reporting and drops many instances of the stringly-typed `BadDescriptor` error. There is currently a bunch of special logic for Taproot descriptors which have the extra characters `{` and `}`. To the extent possible, this PR doesn't touch that logic. It will be addressed in a later PR. The benchmarks show a slight slowdown since we essentially added new validation logic without removing the old logic. Later PRs will improve things. ACKs for top commit: sanket1729: ACK eb4f8c4 Tree-SHA512: cb972e9683aca51f8d18ab61521af8606f47bd1d0bc37dd1ed085101dbc4dd69b382eb05e8e21d2856ac68ebe7d2ca7879ca8a0692dacbea0b22b7b10c9ef987
2 parents ae8f450 + eb4f8c4 commit 460400d

File tree

12 files changed

+465
-120
lines changed

12 files changed

+465
-120
lines changed

src/descriptor/bare.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ use core::fmt;
1212
use bitcoin::script::{self, PushBytes};
1313
use bitcoin::{Address, Network, ScriptBuf, Weight};
1414

15-
use super::checksum::verify_checksum;
1615
use crate::descriptor::{write_descriptor, DefiniteDescriptorKey};
1716
use crate::expression::{self, FromTree};
1817
use crate::miniscript::context::{ScriptContext, ScriptContextError};
@@ -186,8 +185,7 @@ impl<Pk: FromStrKey> FromTree for Bare<Pk> {
186185
impl<Pk: FromStrKey> core::str::FromStr for Bare<Pk> {
187186
type Err = Error;
188187
fn from_str(s: &str) -> Result<Self, Self::Err> {
189-
let desc_str = verify_checksum(s)?;
190-
let top = expression::Tree::from_str(desc_str)?;
188+
let top = expression::Tree::from_str(s)?;
191189
Self::from_tree(&top)
192190
}
193191
}
@@ -387,8 +385,7 @@ impl<Pk: FromStrKey> FromTree for Pkh<Pk> {
387385
impl<Pk: FromStrKey> core::str::FromStr for Pkh<Pk> {
388386
type Err = Error;
389387
fn from_str(s: &str) -> Result<Self, Self::Err> {
390-
let desc_str = verify_checksum(s)?;
391-
let top = expression::Tree::from_str(desc_str)?;
388+
let top = expression::Tree::from_str(s)?;
392389
Self::from_tree(&top)
393390
}
394391
}

src/descriptor/checksum.rs

Lines changed: 110 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -14,46 +14,117 @@ use core::iter::FromIterator;
1414
use bech32::primitives::checksum::PackedFe32;
1515
use bech32::{Checksum, Fe32};
1616

17-
pub use crate::expression::VALID_CHARS;
1817
use crate::prelude::*;
19-
use crate::Error;
2018

2119
const CHECKSUM_LENGTH: usize = 8;
2220
const CODE_LENGTH: usize = 32767;
2321

24-
/// Compute the checksum of a descriptor.
22+
/// Map of valid characters in descriptor strings.
2523
///
26-
/// Note that this function does not check if the descriptor string is
27-
/// syntactically correct or not. This only computes the checksum.
28-
pub fn desc_checksum(desc: &str) -> Result<String, Error> {
29-
let mut eng = Engine::new();
30-
eng.input(desc)?;
31-
Ok(eng.checksum())
24+
/// The map starts at 32 (space) and runs up to 126 (tilde).
25+
#[rustfmt::skip]
26+
const CHAR_MAP: [u8; 95] = [
27+
94, 59, 92, 91, 28, 29, 50, 15, 10, 11, 17, 51, 14, 52, 53, 16,
28+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 27, 54, 55, 56, 57, 58,
29+
26, 82, 83, 84, 85, 86, 87, 88, 89, 32, 33, 34, 35, 36, 37, 38,
30+
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 12, 93, 13, 60, 61,
31+
90, 18, 19, 20, 21, 22, 23, 24, 25, 64, 65, 66, 67, 68, 69, 70,
32+
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 30, 62, 31, 63,
33+
];
34+
35+
/// Error validating descriptor checksum.
36+
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
37+
pub enum Error {
38+
/// Character outside of descriptor charset.
39+
InvalidCharacter {
40+
/// The character in question.
41+
ch: char,
42+
/// Its position in the string.
43+
pos: usize,
44+
},
45+
/// Checksum had the incorrect length.
46+
InvalidChecksumLength {
47+
/// The length of the checksum in the string.
48+
actual: usize,
49+
/// The length of a valid descriptor checksum.
50+
expected: usize,
51+
},
52+
/// Checksum was invalid.
53+
InvalidChecksum {
54+
/// The checksum in the string.
55+
actual: [char; CHECKSUM_LENGTH],
56+
/// The checksum that should have been there, assuming the string is valid.
57+
expected: [char; CHECKSUM_LENGTH],
58+
},
59+
}
60+
61+
impl fmt::Display for Error {
62+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
63+
match *self {
64+
Error::InvalidCharacter { ch, pos } => {
65+
write!(f, "invalid character '{}' (position {})", ch, pos)
66+
}
67+
Error::InvalidChecksumLength { actual, expected } => {
68+
write!(f, "invalid checksum (length {}, expected {})", actual, expected)
69+
}
70+
Error::InvalidChecksum { actual, expected } => {
71+
f.write_str("invalid checksum ")?;
72+
for ch in actual {
73+
ch.fmt(f)?;
74+
}
75+
f.write_str("; expected ")?;
76+
for ch in expected {
77+
ch.fmt(f)?;
78+
}
79+
Ok(())
80+
}
81+
}
82+
}
83+
}
84+
85+
#[cfg(feature = "std")]
86+
impl std::error::Error for Error {
87+
fn cause(&self) -> Option<&dyn std::error::Error> { None }
3288
}
3389

3490
/// Helper function for `FromStr` for various descriptor types.
3591
///
3692
/// Checks and verifies the checksum if it is present and returns the descriptor
3793
/// string without the checksum.
38-
pub(super) fn verify_checksum(s: &str) -> Result<&str, Error> {
39-
for ch in s.as_bytes() {
40-
if *ch < 20 || *ch > 127 {
41-
return Err(Error::Unprintable(*ch));
94+
pub fn verify_checksum(s: &str) -> Result<&str, Error> {
95+
let mut last_hash_pos = s.len();
96+
for (pos, ch) in s.char_indices() {
97+
if !(32..127).contains(&u32::from(ch)) {
98+
return Err(Error::InvalidCharacter { ch, pos });
99+
} else if ch == '#' {
100+
last_hash_pos = pos;
42101
}
43102
}
103+
// After this point we know we have ASCII and can stop using character methods.
104+
105+
if last_hash_pos < s.len() {
106+
let checksum_str = &s[last_hash_pos + 1..];
107+
if checksum_str.len() != CHECKSUM_LENGTH {
108+
return Err(Error::InvalidChecksumLength {
109+
actual: checksum_str.len(),
110+
expected: CHECKSUM_LENGTH,
111+
});
112+
}
113+
114+
let mut eng = Engine::new();
115+
eng.input_unchecked(s[..last_hash_pos].as_bytes());
44116

45-
let mut parts = s.splitn(2, '#');
46-
let desc_str = parts.next().unwrap();
47-
if let Some(checksum_str) = parts.next() {
48-
let expected_sum = desc_checksum(desc_str)?;
49-
if checksum_str != expected_sum {
50-
return Err(Error::BadDescriptor(format!(
51-
"Invalid checksum '{}', expected '{}'",
52-
checksum_str, expected_sum
53-
)));
117+
let expected = eng.checksum_chars();
118+
let mut actual = ['_'; CHECKSUM_LENGTH];
119+
for (act, ch) in actual.iter_mut().zip(checksum_str.chars()) {
120+
*act = ch;
121+
}
122+
123+
if expected != actual {
124+
return Err(Error::InvalidChecksum { actual, expected });
54125
}
55126
}
56-
Ok(desc_str)
127+
Ok(&s[..last_hash_pos])
57128
}
58129

59130
/// An engine to compute a checksum from a string.
@@ -78,16 +149,18 @@ impl Engine {
78149
/// If this function returns an error, the `Engine` will be left in an indeterminate
79150
/// state! It is safe to continue feeding it data but the result will not be meaningful.
80151
pub fn input(&mut self, s: &str) -> Result<(), Error> {
81-
for ch in s.chars() {
82-
let pos = VALID_CHARS
83-
.get(ch as usize)
84-
.ok_or_else(|| {
85-
Error::BadDescriptor(format!("Invalid character in checksum: '{}'", ch))
86-
})?
87-
.ok_or_else(|| {
88-
Error::BadDescriptor(format!("Invalid character in checksum: '{}'", ch))
89-
})? as u64;
152+
for (pos, ch) in s.char_indices() {
153+
if !(32..127).contains(&u32::from(ch)) {
154+
return Err(Error::InvalidCharacter { ch, pos });
155+
}
156+
}
157+
self.input_unchecked(s.as_bytes());
158+
Ok(())
159+
}
90160

161+
fn input_unchecked(&mut self, s: &[u8]) {
162+
for ch in s {
163+
let pos = u64::from(CHAR_MAP[usize::from(*ch) - 32]);
91164
let fe = Fe32::try_from(pos & 31).expect("pos is valid because of the mask");
92165
self.inner.input_fe(fe);
93166

@@ -100,7 +173,6 @@ impl Engine {
100173
self.clscount = 0;
101174
}
102175
}
103-
Ok(())
104176
}
105177

106178
/// Obtains the checksum characters of all the data thus-far fed to the
@@ -192,7 +264,9 @@ mod test {
192264

193265
macro_rules! check_expected {
194266
($desc: expr, $checksum: expr) => {
195-
assert_eq!(desc_checksum($desc).unwrap(), $checksum);
267+
let mut eng = Engine::new();
268+
eng.input_unchecked($desc.as_bytes());
269+
assert_eq!(eng.checksum(), $checksum);
196270
};
197271
}
198272

@@ -229,8 +303,8 @@ mod test {
229303
let invalid_desc = format!("wpkh(tprv8ZgxMBicQKsPdpkqS7Eair4YxjcuuvDPNYmKX3sCniCf16tHEVrjjiSXEkFRnUH77yXc6ZcwHHcL{}fjdi5qUvw3VDfgYiH5mNsj5izuiu2N/1/2/*)", sparkle_heart);
230304

231305
assert_eq!(
232-
desc_checksum(&invalid_desc).err().unwrap().to_string(),
233-
format!("Invalid descriptor: Invalid character in checksum: '{}'", sparkle_heart)
306+
verify_checksum(&invalid_desc).err().unwrap().to_string(),
307+
format!("invalid character '{}' (position 85)", sparkle_heart)
234308
);
235309
}
236310

src/descriptor/mod.rs

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ use bitcoin::{
2121
};
2222
use sync::Arc;
2323

24-
use self::checksum::verify_checksum;
2524
use crate::miniscript::decode::Terminal;
2625
use crate::miniscript::{satisfy, Legacy, Miniscript, Segwitv0};
2726
use crate::plan::{AssetProvider, Plan};
@@ -988,8 +987,7 @@ impl<Pk: FromStrKey> FromStr for Descriptor<Pk> {
988987
let desc = if s.starts_with("tr(") {
989988
Ok(Descriptor::Tr(Tr::from_str(s)?))
990989
} else {
991-
let desc_str = verify_checksum(s)?;
992-
let top = expression::Tree::from_str(desc_str)?;
990+
let top = expression::Tree::from_str(s)?;
993991
expression::FromTree::from_tree(&top)
994992
}?;
995993

@@ -1053,8 +1051,7 @@ mod tests {
10531051
use bitcoin::sighash::EcdsaSighashType;
10541052
use bitcoin::{bip32, PublicKey, Sequence};
10551053

1056-
use super::checksum::desc_checksum;
1057-
use super::*;
1054+
use super::{checksum, *};
10581055
use crate::hex_script;
10591056
#[cfg(feature = "compiler")]
10601057
use crate::policy;
@@ -1066,10 +1063,10 @@ mod tests {
10661063
let desc = Descriptor::<String>::from_str(s).unwrap();
10671064
let output = desc.to_string();
10681065
let normalize_aliases = s.replace("c:pk_k(", "pk(").replace("c:pk_h(", "pkh(");
1069-
assert_eq!(
1070-
format!("{}#{}", &normalize_aliases, desc_checksum(&normalize_aliases).unwrap()),
1071-
output
1072-
);
1066+
1067+
let mut checksum_eng = checksum::Engine::new();
1068+
checksum_eng.input(&normalize_aliases).unwrap();
1069+
assert_eq!(format!("{}#{}", &normalize_aliases, checksum_eng.checksum()), output);
10731070
}
10741071

10751072
#[test]
@@ -1841,7 +1838,7 @@ mod tests {
18411838
($secp: ident,$($desc: expr),*) => {
18421839
$(
18431840
match Descriptor::parse_descriptor($secp, $desc) {
1844-
Err(Error::BadDescriptor(_)) => {},
1841+
Err(Error::ParseTree(crate::ParseTreeError::Checksum(_))) => {},
18451842
Err(e) => panic!("Expected bad checksum for {}, got '{}'", $desc, e),
18461843
_ => panic!("Invalid checksum treated as valid: {}", $desc),
18471844
};

src/descriptor/segwitv0.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ use core::fmt;
1010

1111
use bitcoin::{Address, Network, ScriptBuf, Weight};
1212

13-
use super::checksum::verify_checksum;
1413
use super::SortedMultiVec;
1514
use crate::descriptor::{write_descriptor, DefiniteDescriptorKey};
1615
use crate::expression::{self, FromTree};
@@ -288,8 +287,7 @@ impl<Pk: MiniscriptKey> fmt::Display for Wsh<Pk> {
288287
impl<Pk: FromStrKey> core::str::FromStr for Wsh<Pk> {
289288
type Err = Error;
290289
fn from_str(s: &str) -> Result<Self, Self::Err> {
291-
let desc_str = verify_checksum(s)?;
292-
let top = expression::Tree::from_str(desc_str)?;
290+
let top = expression::Tree::from_str(s)?;
293291
Wsh::<Pk>::from_tree(&top)
294292
}
295293
}
@@ -505,8 +503,7 @@ impl<Pk: FromStrKey> crate::expression::FromTree for Wpkh<Pk> {
505503
impl<Pk: FromStrKey> core::str::FromStr for Wpkh<Pk> {
506504
type Err = Error;
507505
fn from_str(s: &str) -> Result<Self, Self::Err> {
508-
let desc_str = verify_checksum(s)?;
509-
let top = expression::Tree::from_str(desc_str)?;
506+
let top = expression::Tree::from_str(s)?;
510507
Self::from_tree(&top)
511508
}
512509
}

src/descriptor/sh.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ use core::fmt;
1313
use bitcoin::script::PushBytes;
1414
use bitcoin::{script, Address, Network, ScriptBuf, Weight};
1515

16-
use super::checksum::verify_checksum;
1716
use super::{SortedMultiVec, Wpkh, Wsh};
1817
use crate::descriptor::{write_descriptor, DefiniteDescriptorKey};
1918
use crate::expression::{self, FromTree};
@@ -109,8 +108,7 @@ impl<Pk: FromStrKey> crate::expression::FromTree for Sh<Pk> {
109108
impl<Pk: FromStrKey> core::str::FromStr for Sh<Pk> {
110109
type Err = Error;
111110
fn from_str(s: &str) -> Result<Self, Self::Err> {
112-
let desc_str = verify_checksum(s)?;
113-
let top = expression::Tree::from_str(desc_str)?;
111+
let top = expression::Tree::from_str(s)?;
114112
Self::from_tree(&top)
115113
}
116114
}

src/descriptor/tr.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,9 @@ impl<Pk: FromStrKey> crate::expression::FromTree for Tr<Pk> {
557557
impl<Pk: FromStrKey> core::str::FromStr for Tr<Pk> {
558558
type Err = Error;
559559
fn from_str(s: &str) -> Result<Self, Self::Err> {
560-
let desc_str = verify_checksum(s)?;
560+
let desc_str = verify_checksum(s)
561+
.map_err(From::from)
562+
.map_err(Error::ParseTree)?;
561563
let top = parse_tr_tree(desc_str)?;
562564
Self::from_tree(&top)
563565
}
@@ -587,8 +589,6 @@ impl<Pk: MiniscriptKey> fmt::Display for Tr<Pk> {
587589

588590
// Helper function to parse string into miniscript tree form
589591
fn parse_tr_tree(s: &str) -> Result<expression::Tree, Error> {
590-
expression::check_valid_chars(s)?;
591-
592592
if s.len() > 3 && &s[..3] == "tr(" && s.as_bytes()[s.len() - 1] == b')' {
593593
let rest = &s[3..s.len() - 1];
594594
if !rest.contains(',') {

0 commit comments

Comments
 (0)