@@ -595,6 +595,150 @@ def _code(p, flags):
595
595
596
596
return code
597
597
598
+ def _hex_code (code ):
599
+ return '[%s]' % ', ' .join ('%#0*x' % (_sre .CODESIZE * 2 + 2 , x ) for x in code )
600
+
601
+ def dis (code ):
602
+ import sys
603
+
604
+ labels = set ()
605
+ level = 0
606
+ offset_width = len (str (len (code ) - 1 ))
607
+
608
+ def dis_ (start , end ):
609
+ def print_ (* args , to = None ):
610
+ if to is not None :
611
+ labels .add (to )
612
+ args += ('(to %d)' % (to ,),)
613
+ print ('%*d%s ' % (offset_width , start , ':' if start in labels else '.' ),
614
+ end = ' ' * (level - 1 ))
615
+ print (* args )
616
+
617
+ def print_2 (* args ):
618
+ print (end = ' ' * (offset_width + 2 * level ))
619
+ print (* args )
620
+
621
+ nonlocal level
622
+ level += 1
623
+ i = start
624
+ while i < end :
625
+ start = i
626
+ op = code [i ]
627
+ i += 1
628
+ op = OPCODES [op ]
629
+ if op in (SUCCESS , FAILURE , ANY , ANY_ALL ,
630
+ MAX_UNTIL , MIN_UNTIL , NEGATE ):
631
+ print_ (op )
632
+ elif op in (LITERAL , NOT_LITERAL ,
633
+ LITERAL_IGNORE , NOT_LITERAL_IGNORE ,
634
+ LITERAL_LOC_IGNORE , NOT_LITERAL_LOC_IGNORE ):
635
+ arg = code [i ]
636
+ i += 1
637
+ print_ (op , '%#02x (%r)' % (arg , chr (arg )))
638
+ elif op is AT :
639
+ arg = code [i ]
640
+ i += 1
641
+ arg = str (ATCODES [arg ])
642
+ assert arg [:3 ] == 'AT_'
643
+ print_ (op , arg [3 :])
644
+ elif op is CATEGORY :
645
+ arg = code [i ]
646
+ i += 1
647
+ arg = str (CHCODES [arg ])
648
+ assert arg [:9 ] == 'CATEGORY_'
649
+ print_ (op , arg [9 :])
650
+ elif op in (IN , IN_IGNORE , IN_LOC_IGNORE ):
651
+ skip = code [i ]
652
+ print_ (op , skip , to = i + skip )
653
+ dis_ (i + 1 , i + skip )
654
+ i += skip
655
+ elif op in (RANGE , RANGE_IGNORE ):
656
+ lo , hi = code [i : i + 2 ]
657
+ i += 2
658
+ print_ (op , '%#02x %#02x (%r-%r)' % (lo , hi , chr (lo ), chr (hi )))
659
+ elif op is CHARSET :
660
+ print_ (op , _hex_code (code [i : i + 256 // _CODEBITS ]))
661
+ i += 256 // _CODEBITS
662
+ elif op is BIGCHARSET :
663
+ arg = code [i ]
664
+ i += 1
665
+ mapping = list (b'' .join (x .to_bytes (_sre .CODESIZE , sys .byteorder )
666
+ for x in code [i : i + 256 // _sre .CODESIZE ]))
667
+ print_ (op , arg , mapping )
668
+ i += 256 // _sre .CODESIZE
669
+ level += 1
670
+ for j in range (arg ):
671
+ print_2 (_hex_code (code [i : i + 256 // _CODEBITS ]))
672
+ i += 256 // _CODEBITS
673
+ level -= 1
674
+ elif op in (MARK , GROUPREF , GROUPREF_IGNORE ):
675
+ arg = code [i ]
676
+ i += 1
677
+ print_ (op , arg )
678
+ elif op is JUMP :
679
+ skip = code [i ]
680
+ print_ (op , skip , to = i + skip )
681
+ i += 1
682
+ elif op is BRANCH :
683
+ skip = code [i ]
684
+ print_ (op , skip , to = i + skip )
685
+ while skip :
686
+ dis_ (i + 1 , i + skip )
687
+ i += skip
688
+ start = i
689
+ skip = code [i ]
690
+ if skip :
691
+ print_ ('branch' , skip , to = i + skip )
692
+ else :
693
+ print_ (FAILURE )
694
+ i += 1
695
+ elif op in (REPEAT , REPEAT_ONE , MIN_REPEAT_ONE ):
696
+ skip , min , max = code [i : i + 3 ]
697
+ if max == MAXREPEAT :
698
+ max = 'MAXREPEAT'
699
+ print_ (op , skip , min , max , to = i + skip )
700
+ dis_ (i + 3 , i + skip )
701
+ i += skip
702
+ elif op is GROUPREF_EXISTS :
703
+ arg , skip = code [i : i + 2 ]
704
+ print_ (op , arg , skip , to = i + skip )
705
+ i += 2
706
+ elif op in (ASSERT , ASSERT_NOT ):
707
+ skip , arg = code [i : i + 2 ]
708
+ print_ (op , skip , arg , to = i + skip )
709
+ dis_ (i + 2 , i + skip )
710
+ i += skip
711
+ elif op is INFO :
712
+ skip , flags , min , max = code [i : i + 4 ]
713
+ if max == MAXREPEAT :
714
+ max = 'MAXREPEAT'
715
+ print_ (op , skip , bin (flags ), min , max , to = i + skip )
716
+ start = i + 4
717
+ if flags & SRE_INFO_PREFIX :
718
+ prefix_len , prefix_skip = code [i + 4 : i + 6 ]
719
+ print_2 (' prefix_skip' , prefix_skip )
720
+ start = i + 6
721
+ prefix = code [start : start + prefix_len ]
722
+ print_2 (' prefix' ,
723
+ '[%s]' % ', ' .join ('%#02x' % x for x in prefix ),
724
+ '(%r)' % '' .join (map (chr , prefix )))
725
+ start += prefix_len
726
+ print_2 (' overlap' , code [start : start + prefix_len ])
727
+ start += prefix_len
728
+ if flags & SRE_INFO_CHARSET :
729
+ level += 1
730
+ print_2 ('in' )
731
+ dis_ (start , i + skip )
732
+ level -= 1
733
+ i += skip
734
+ else :
735
+ raise ValueError (op )
736
+
737
+ level -= 1
738
+
739
+ dis_ (0 , len (code ))
740
+
741
+
598
742
def compile (p , flags = 0 ):
599
743
# internal: convert pattern list to internal format
600
744
@@ -606,7 +750,9 @@ def compile(p, flags=0):
606
750
607
751
code = _code (p , flags )
608
752
609
- # print(code)
753
+ if flags & SRE_FLAG_DEBUG :
754
+ print ()
755
+ dis (code )
610
756
611
757
# map in either direction
612
758
groupindex = p .pattern .groupdict
0 commit comments