8
8
import functools
9
9
import logging
10
10
import os
11
+ import re
11
12
import types
12
13
import unicodedata
13
14
@@ -1648,7 +1649,8 @@ class _MathStyle(enum.Enum):
1648
1649
\cdot \bigtriangledown \bigcirc
1649
1650
\cap \triangleleft \dagger
1650
1651
\cup \triangleright \ddagger
1651
- \uplus \lhd \amalg''' .split ())
1652
+ \uplus \lhd \amalg
1653
+ \dotplus \dotminus''' .split ())
1652
1654
1653
1655
_relation_symbols = set (r'''
1654
1656
= < > :
@@ -1661,7 +1663,7 @@ class _MathStyle(enum.Enum):
1661
1663
\sqsubset \sqsupset \neq \smile
1662
1664
\sqsubseteq \sqsupseteq \doteq \frown
1663
1665
\in \ni \propto \vdash
1664
- \dashv \dots \dotplus \ doteqdot''' .split ())
1666
+ \dashv \dots \doteqdot''' .split ())
1665
1667
1666
1668
_arrow_symbols = set (r'''
1667
1669
\leftarrow \longleftarrow \uparrow
@@ -1717,24 +1719,36 @@ def set_names_and_parse_actions():
1717
1719
1718
1720
# Root definitions.
1719
1721
1722
+ # In TeX parlance, a csname is a control sequence name (a "\foo").
1723
+ def csnames (group , names ):
1724
+ ends_with_alpha = []
1725
+ ends_with_nonalpha = []
1726
+ for name in names :
1727
+ if name [- 1 ].isalpha ():
1728
+ ends_with_alpha .append (name )
1729
+ else :
1730
+ ends_with_nonalpha .append (name )
1731
+ return Regex (r"\\(?P<{}>(?:{})(?![A-Za-z]){})" .format (
1732
+ group ,
1733
+ "|" .join (map (re .escape , ends_with_alpha )),
1734
+ "" .join (f"|{ s } " for s in map (re .escape , ends_with_nonalpha )),
1735
+ ))
1736
+
1720
1737
p .float_literal = Regex (r"[-+]?([0-9]+\.?[0-9]*|\.[0-9]+)" )
1721
1738
p .space = oneOf (self ._space_widths )("space" )
1722
1739
1723
1740
p .style_literal = oneOf (
1724
1741
[str (e .value ) for e in self ._MathStyle ])("style_literal" )
1725
1742
1726
- p .single_symbol = Regex (
1727
- r"([a-zA-Z0-9 +\-*/<>=:,.;!\?&'@()\[\]|%s])|(\\[%%${}\[\]_|])" %
1728
- "\U00000080 -\U0001ffff " # unicode range
1729
- )("sym" )
1730
- p .accentprefixed = "\\ " + oneOf (self ._accentprefixed )("sym" )
1731
- p .symbol_name = (
1732
- oneOf ([rf"\{ sym } " for sym in tex2uni ])("sym" )
1733
- + Regex ("(?=[^A-Za-z]|$)" ).leaveWhitespace ())
1734
- p .symbol = (p .single_symbol | p .symbol_name ).leaveWhitespace ()
1743
+ p .symbol = Regex (
1744
+ r"[a-zA-Z0-9 +\-*/<>=:,.;!\?&'@()\[\]|\U00000080-\U0001ffff]"
1745
+ r"|\\[%${}\[\]_|]"
1746
+ + r"|\\(?:{})(?![A-Za-z])" .format (
1747
+ "|" .join (map (re .escape , tex2uni )))
1748
+ )("sym" ).leaveWhitespace ()
1735
1749
p .unknown_symbol = Regex (r"\\[A-Za-z]*" )("name" )
1736
1750
1737
- p .font = " \\ " + oneOf ( self ._fontnames )( "font" )
1751
+ p .font = csnames ( "font" , self ._fontnames )
1738
1752
p .start_group = (
1739
1753
Optional (r"\math" + oneOf (self ._fontnames )("font" )) + "{" )
1740
1754
p .end_group = Literal ("}" )
@@ -1771,11 +1785,10 @@ def set_names_and_parse_actions():
1771
1785
p .customspace <<= cmd (r"\hspace" , "{" + p .float_literal ("space" ) + "}" )
1772
1786
1773
1787
p .accent <<= (
1774
- "\\ "
1775
- + oneOf ([* self ._accent_map , * self ._wide_accents ])("accent" )
1788
+ csnames ("accent" , [* self ._accent_map , * self ._wide_accents ])
1776
1789
- p .placeable ("sym" ))
1777
1790
1778
- p .function <<= " \\ " + oneOf ( self ._function_names )( "name" )
1791
+ p .function <<= csnames ( "name" , self ._function_names )
1779
1792
p .operatorname <<= cmd (
1780
1793
r"\operatorname" ,
1781
1794
"{" + ZeroOrMore (p .simple | p .unknown_symbol )("name" ) + "}" )
@@ -1816,10 +1829,8 @@ def set_names_and_parse_actions():
1816
1829
p .optional_group ("annotation" ) + p .optional_group ("body" ))
1817
1830
1818
1831
p .placeable <<= (
1819
- p .accentprefixed # Must be before accent so named symbols that are
1820
- # prefixed with an accent name work
1821
- | p .accent # Must be before symbol as all accents are symbols
1822
- | p .symbol # Must be third to catch all named symbols and single
1832
+ p .accent # Must be before symbol as all accents are symbols
1833
+ | p .symbol # Must be second to catch all named symbols and single
1823
1834
# chars not in a group
1824
1835
| p .function
1825
1836
| p .operatorname
@@ -2019,8 +2030,6 @@ def symbol(self, s, loc, toks):
2019
2030
return [Hlist ([char , self ._make_space (0.2 )], do_kern = True )]
2020
2031
return [char ]
2021
2032
2022
- accentprefixed = symbol
2023
-
2024
2033
def unknown_symbol (self , s , loc , toks ):
2025
2034
raise ParseFatalException (s , loc , f"Unknown symbol: { toks ['name' ]} " )
2026
2035
@@ -2049,12 +2058,6 @@ def unknown_symbol(self, s, loc, toks):
2049
2058
2050
2059
_wide_accents = set (r"widehat widetilde widebar" .split ())
2051
2060
2052
- # make a lambda and call it to get the namespace right
2053
- _accentprefixed = (lambda am : [
2054
- p for p in tex2uni
2055
- if any (p .startswith (a ) and a != p for a in am )
2056
- ])(set (_accent_map ))
2057
-
2058
2061
def accent (self , s , loc , toks ):
2059
2062
state = self .get_state ()
2060
2063
thickness = state .get_current_underline_thickness ()
0 commit comments