@@ -8,7 +8,7 @@ mod error;
8
8
use core:: fmt;
9
9
use core:: str:: FromStr ;
10
10
11
- pub use self :: error:: ParseThresholdError ;
11
+ pub use self :: error:: { ParseThresholdError , ParseTreeError } ;
12
12
use crate :: prelude:: * ;
13
13
use crate :: { errstr, Error , Threshold , MAX_RECURSION_DEPTH } ;
14
14
@@ -145,13 +145,107 @@ impl<'a> Tree<'a> {
145
145
Self :: from_slice_delim ( sl, 0u32 , '(' )
146
146
}
147
147
148
+ fn parse_pre_check ( s : & str , open : u8 , close : u8 ) -> Result < ( ) , ParseTreeError > {
149
+ // First, scan through string to make sure it is well-formed.
150
+ let mut max_depth = 0 ;
151
+ // Do ASCII check first; after this we can use .bytes().enumerate() rather
152
+ // than .char_indices(), which is *significantly* faster.
153
+ for ( pos, ch) in s. char_indices ( ) {
154
+ if !( 32 ..128 ) . contains ( & u32:: from ( ch) ) {
155
+ return Err ( ParseTreeError :: InvalidCharacter { ch, pos } ) ;
156
+ }
157
+ }
158
+
159
+ let mut open_paren_stack = Vec :: with_capacity ( 128 ) ;
160
+
161
+ for ( pos, ch) in s. bytes ( ) . enumerate ( ) {
162
+ if ch == open {
163
+ open_paren_stack. push ( ( ch, pos) ) ;
164
+ if max_depth < open_paren_stack. len ( ) {
165
+ max_depth = open_paren_stack. len ( ) ;
166
+ }
167
+ } else if ch == close {
168
+ if let Some ( ( open_ch, open_pos) ) = open_paren_stack. pop ( ) {
169
+ if ( open_ch == b'(' && ch == b'}' ) || ( open_ch == b'{' && ch == b')' ) {
170
+ return Err ( ParseTreeError :: MismatchedParens {
171
+ open_ch : open_ch. into ( ) ,
172
+ open_pos,
173
+ close_ch : ch. into ( ) ,
174
+ close_pos : pos,
175
+ } ) ;
176
+ }
177
+
178
+ if let Some ( & ( paren_ch, paren_pos) ) = open_paren_stack. last ( ) {
179
+ // not last paren; this should not be the end of the string,
180
+ // and the next character should be a , ) or }.
181
+ if pos == s. len ( ) - 1 {
182
+ return Err ( ParseTreeError :: UnmatchedOpenParen {
183
+ ch : paren_ch. into ( ) ,
184
+ pos : paren_pos,
185
+ } ) ;
186
+ } else {
187
+ let next_byte = s. as_bytes ( ) [ pos + 1 ] ;
188
+ if next_byte != b')' && next_byte != b'}' && next_byte != b',' {
189
+ return Err ( ParseTreeError :: ExpectedParenOrComma {
190
+ ch : next_byte. into ( ) ,
191
+ pos : pos + 1 ,
192
+ } ) ;
193
+ //
194
+ }
195
+ }
196
+ } else {
197
+ // last paren; this SHOULD be the end of the string
198
+ if pos < s. len ( ) - 1 {
199
+ return Err ( ParseTreeError :: TrailingCharacter {
200
+ ch : s. as_bytes ( ) [ pos + 1 ] . into ( ) ,
201
+ pos : pos + 1 ,
202
+ } ) ;
203
+ }
204
+ }
205
+ } else {
206
+ // In practice, this is only hit if there are no open parens at all.
207
+ // If there are open parens, like in "())", then on the first ), we
208
+ // would have returned TrailingCharacter in the previous clause.
209
+ //
210
+ // From a user point of view, UnmatchedCloseParen would probably be
211
+ // a clearer error to get, but it complicates the parser to do this,
212
+ // and "TralingCharacter" is technically correct, so we leave it for
213
+ // now.
214
+ return Err ( ParseTreeError :: UnmatchedCloseParen { ch : ch. into ( ) , pos } ) ;
215
+ }
216
+ } else if ch == b',' && open_paren_stack. is_empty ( ) {
217
+ // We consider commas outside of the tree to be "trailing characters"
218
+ return Err ( ParseTreeError :: TrailingCharacter { ch : ch. into ( ) , pos } ) ;
219
+ }
220
+ }
221
+ // Catch "early end of string"
222
+ if let Some ( ( ch, pos) ) = open_paren_stack. pop ( ) {
223
+ return Err ( ParseTreeError :: UnmatchedOpenParen { ch : ch. into ( ) , pos } ) ;
224
+ }
225
+
226
+ // FIXME should be able to remove this once we eliminate all recursion
227
+ // in the library.
228
+ if u32:: try_from ( max_depth) . unwrap_or ( u32:: MAX ) > MAX_RECURSION_DEPTH {
229
+ return Err ( ParseTreeError :: MaxRecursionDepthExceeded {
230
+ actual : max_depth,
231
+ maximum : MAX_RECURSION_DEPTH ,
232
+ } ) ;
233
+ }
234
+
235
+ Ok ( ( ) )
236
+ }
237
+
148
238
pub ( crate ) fn from_slice_delim (
149
239
mut sl : & ' a str ,
150
240
depth : u32 ,
151
241
delim : char ,
152
242
) -> Result < ( Tree < ' a > , & ' a str ) , Error > {
153
- if depth >= MAX_RECURSION_DEPTH {
154
- return Err ( Error :: MaxRecursiveDepthExceeded ) ;
243
+ if depth == 0 {
244
+ if delim == '{' {
245
+ Self :: parse_pre_check ( sl, b'{' , b'}' ) . map_err ( Error :: ParseTree ) ?;
246
+ } else {
247
+ Self :: parse_pre_check ( sl, b'(' , b')' ) . map_err ( Error :: ParseTree ) ?;
248
+ }
155
249
}
156
250
157
251
match next_expr ( sl, delim) {
@@ -171,7 +265,7 @@ impl<'a> Tree<'a> {
171
265
ret. args . push ( arg) ;
172
266
173
267
if new_sl. is_empty ( ) {
174
- return Err ( Error :: ExpectedChar ( closing_delim ( delim ) ) ) ;
268
+ unreachable ! ( )
175
269
}
176
270
177
271
sl = & new_sl[ 1 ..] ;
@@ -181,7 +275,7 @@ impl<'a> Tree<'a> {
181
275
if last_byte == closing_delim ( delim) as u8 {
182
276
break ;
183
277
} else {
184
- return Err ( Error :: ExpectedChar ( closing_delim ( delim ) ) ) ;
278
+ unreachable ! ( )
185
279
}
186
280
}
187
281
}
@@ -200,7 +294,7 @@ impl<'a> Tree<'a> {
200
294
if rem. is_empty ( ) {
201
295
Ok ( top)
202
296
} else {
203
- Err ( errstr ( rem ) )
297
+ unreachable ! ( )
204
298
}
205
299
}
206
300
@@ -337,36 +431,88 @@ mod tests {
337
431
fn parse_tree_basic ( ) {
338
432
assert_eq ! ( Tree :: from_str( "thresh" ) . unwrap( ) , leaf( "thresh" ) ) ;
339
433
340
- assert ! ( matches!( Tree :: from_str( "thresh," ) , Err ( Error :: Unexpected ( s) ) if s == "," ) ) ;
434
+ assert ! ( matches!(
435
+ Tree :: from_str( "thresh," ) . unwrap_err( ) ,
436
+ Error :: ParseTree ( ParseTreeError :: TrailingCharacter { ch: ',' , pos: 6 } ) ,
437
+ ) ) ;
341
438
342
439
assert ! ( matches!(
343
- Tree :: from_str( "thresh,thresh" ) ,
344
- Err ( Error :: Unexpected ( s ) ) if s == ",thresh" ,
440
+ Tree :: from_str( "thresh,thresh" ) . unwrap_err ( ) ,
441
+ Error :: ParseTree ( ParseTreeError :: TrailingCharacter { ch : ',' , pos : 6 } ) ,
345
442
) ) ;
346
443
347
444
assert ! ( matches!(
348
- Tree :: from_str( "thresh()thresh()" ) ,
349
- Err ( Error :: Unexpected ( s ) ) if s == "thresh()" ,
445
+ Tree :: from_str( "thresh()thresh()" ) . unwrap_err ( ) ,
446
+ Error :: ParseTree ( ParseTreeError :: TrailingCharacter { ch : 't' , pos : 8 } ) ,
350
447
) ) ;
351
448
352
449
assert_eq ! ( Tree :: from_str( "thresh()" ) . unwrap( ) , paren_node( "thresh" , vec![ leaf( "" ) ] ) ) ;
353
450
354
- // FIXME even for our current crappy error handling, this one is pretty bad
355
- assert ! ( matches!( Tree :: from_str( "thresh(a()b)" ) , Err ( Error :: ExpectedChar ( ')' ) ) ) ) ;
451
+ assert ! ( matches!(
452
+ Tree :: from_str( "thresh(a()b)" ) ,
453
+ Err ( Error :: ParseTree ( ParseTreeError :: ExpectedParenOrComma { ch: 'b' , pos: 10 } ) ) ,
454
+ ) ) ;
356
455
357
- assert ! ( matches!( Tree :: from_str( "thresh()xyz" ) , Err ( Error :: Unexpected ( s) ) if s == "xyz" ) ) ;
456
+ assert ! ( matches!(
457
+ Tree :: from_str( "thresh()xyz" ) ,
458
+ Err ( Error :: ParseTree ( ParseTreeError :: TrailingCharacter { ch: 'x' , pos: 8 } ) ) ,
459
+ ) ) ;
358
460
}
359
461
360
462
#[ test]
361
463
fn parse_tree_parens ( ) {
362
- assert ! ( matches!( Tree :: from_str( "a(" ) , Err ( Error :: ExpectedChar ( ')' ) ) ) ) ;
464
+ assert ! ( matches!(
465
+ Tree :: from_str( "a(" ) . unwrap_err( ) ,
466
+ Error :: ParseTree ( ParseTreeError :: UnmatchedOpenParen { ch: '(' , pos: 1 } ) ,
467
+ ) ) ;
468
+
469
+ assert ! ( matches!(
470
+ Tree :: from_str( ")" ) . unwrap_err( ) ,
471
+ Error :: ParseTree ( ParseTreeError :: UnmatchedCloseParen { ch: ')' , pos: 0 } ) ,
472
+ ) ) ;
473
+
474
+ assert ! ( matches!(
475
+ Tree :: from_str( "x(y))" ) . unwrap_err( ) ,
476
+ Error :: ParseTree ( ParseTreeError :: TrailingCharacter { ch: ')' , pos: 4 } ) ,
477
+ ) ) ;
478
+
479
+ /* Will be enabled in a later PR which unifies TR and non-TR parsing.
480
+ assert!(matches!(
481
+ Tree::from_str("a{").unwrap_err(),
482
+ Error::ParseTree(ParseTreeError::UnmatchedOpenParen { ch: '{', pos: 1 }),
483
+ ));
484
+
485
+ assert!(matches!(
486
+ Tree::from_str("}").unwrap_err(),
487
+ Error::ParseTree(ParseTreeError::UnmatchedCloseParen { ch: '}', pos: 0 }),
488
+ ));
489
+ */
363
490
364
- assert ! ( matches!( Tree :: from_str( ")" ) , Err ( Error :: Unexpected ( s) ) if s == ")" ) ) ;
491
+ assert ! ( matches!(
492
+ Tree :: from_str( "x(y)}" ) . unwrap_err( ) ,
493
+ Error :: ParseTree ( ParseTreeError :: TrailingCharacter { ch: '}' , pos: 4 } ) ,
494
+ ) ) ;
365
495
366
- assert ! ( matches!( Tree :: from_str( "x(y))" ) , Err ( Error :: Unexpected ( s) ) if s == ")" ) ) ;
496
+ /* Will be enabled in a later PR which unifies TR and non-TR parsing.
497
+ assert!(matches!(
498
+ Tree::from_str("x{y)").unwrap_err(),
499
+ Error::ParseTree(ParseTreeError::MismatchedParens {
500
+ open_ch: '{',
501
+ open_pos: 1,
502
+ close_ch: ')',
503
+ close_pos: 3,
504
+ }),
505
+ ));
506
+ */
507
+ }
367
508
368
- // In next commit will add tests related to {}s; currently we ignore
369
- // these except in Taproot mode.
509
+ #[ test]
510
+ fn parse_tree_taproot ( ) {
511
+ // This test will change in a later PR which unifies TR and non-TR parsing.
512
+ assert ! ( matches!(
513
+ Tree :: from_str( "a{b(c),d}" ) . unwrap_err( ) ,
514
+ Error :: ParseTree ( ParseTreeError :: TrailingCharacter { ch: ',' , pos: 6 } ) ,
515
+ ) ) ;
370
516
}
371
517
372
518
#[ test]
0 commit comments