1
1
import io
2
+ import itertools
2
3
import shlex
3
4
import string
4
5
import unittest
@@ -183,10 +184,12 @@ def testSyntaxSplitAmpersandAndPipe(self):
183
184
src = ['echo hi %s echo bye' % delimiter ,
184
185
'echo hi%secho bye' % delimiter ]
185
186
ref = ['echo' , 'hi' , delimiter , 'echo' , 'bye' ]
186
- for ss in src :
187
+ for ss , ws in itertools . product ( src , ( False , True )) :
187
188
s = shlex .shlex (ss , punctuation_chars = True )
189
+ s .whitespace_split = ws
188
190
result = list (s )
189
- self .assertEqual (ref , result , "While splitting '%s'" % ss )
191
+ self .assertEqual (ref , result ,
192
+ "While splitting '%s' [ws=%s]" % (ss , ws ))
190
193
191
194
def testSyntaxSplitSemicolon (self ):
192
195
"""Test handling of syntax splitting of ;"""
@@ -197,10 +200,12 @@ def testSyntaxSplitSemicolon(self):
197
200
'echo hi%s echo bye' % delimiter ,
198
201
'echo hi%secho bye' % delimiter ]
199
202
ref = ['echo' , 'hi' , delimiter , 'echo' , 'bye' ]
200
- for ss in src :
203
+ for ss , ws in itertools . product ( src , ( False , True )) :
201
204
s = shlex .shlex (ss , punctuation_chars = True )
205
+ s .whitespace_split = ws
202
206
result = list (s )
203
- self .assertEqual (ref , result , "While splitting '%s'" % ss )
207
+ self .assertEqual (ref , result ,
208
+ "While splitting '%s' [ws=%s]" % (ss , ws ))
204
209
205
210
def testSyntaxSplitRedirect (self ):
206
211
"""Test handling of syntax splitting of >"""
@@ -211,29 +216,37 @@ def testSyntaxSplitRedirect(self):
211
216
'echo hi%s out' % delimiter ,
212
217
'echo hi%sout' % delimiter ]
213
218
ref = ['echo' , 'hi' , delimiter , 'out' ]
214
- for ss in src :
219
+ for ss , ws in itertools . product ( src , ( False , True )) :
215
220
s = shlex .shlex (ss , punctuation_chars = True )
216
221
result = list (s )
217
- self .assertEqual (ref , result , "While splitting '%s'" % ss )
222
+ self .assertEqual (ref , result ,
223
+ "While splitting '%s' [ws=%s]" % (ss , ws ))
218
224
219
225
def testSyntaxSplitParen (self ):
220
226
"""Test handling of syntax splitting of ()"""
221
227
# these should all parse to the same output
222
228
src = ['( echo hi )' ,
223
229
'(echo hi)' ]
224
230
ref = ['(' , 'echo' , 'hi' , ')' ]
225
- for ss in src :
231
+ for ss , ws in itertools . product ( src , ( False , True )) :
226
232
s = shlex .shlex (ss , punctuation_chars = True )
233
+ s .whitespace_split = ws
227
234
result = list (s )
228
- self .assertEqual (ref , result , "While splitting '%s'" % ss )
235
+ self .assertEqual (ref , result ,
236
+ "While splitting '%s' [ws=%s]" % (ss , ws ))
229
237
230
238
def testSyntaxSplitCustom (self ):
231
239
"""Test handling of syntax splitting with custom chars"""
240
+ ss = "~/a&&b-c --color=auto||d *.py?"
232
241
ref = ['~/a' , '&' , '&' , 'b-c' , '--color=auto' , '||' , 'd' , '*.py?' ]
233
- ss = "~/a && b-c --color=auto || d *.py?"
234
242
s = shlex .shlex (ss , punctuation_chars = "|" )
235
243
result = list (s )
236
- self .assertEqual (ref , result , "While splitting '%s'" % ss )
244
+ self .assertEqual (ref , result , "While splitting '%s' [ws=False]" % ss )
245
+ ref = ['~/a&&b-c' , '--color=auto' , '||' , 'd' , '*.py?' ]
246
+ s = shlex .shlex (ss , punctuation_chars = "|" )
247
+ s .whitespace_split = True
248
+ result = list (s )
249
+ self .assertEqual (ref , result , "While splitting '%s' [ws=True]" % ss )
237
250
238
251
def testTokenTypes (self ):
239
252
"""Test that tokens are split with types as expected."""
@@ -293,6 +306,19 @@ def testEmptyStringHandling(self):
293
306
s = shlex .shlex ("'')abc" , punctuation_chars = True )
294
307
self .assertEqual (list (s ), expected )
295
308
309
+ def testUnicodeHandling (self ):
310
+ """Test punctuation_chars and whitespace_split handle unicode."""
311
+ ss = "\u2119 \u01b4 \u2602 \u210c \u00f8 \u1f24 "
312
+ # Should be parsed as one complete token (whitespace_split=True).
313
+ ref = ['\u2119 \u01b4 \u2602 \u210c \u00f8 \u1f24 ' ]
314
+ s = shlex .shlex (ss , punctuation_chars = True )
315
+ s .whitespace_split = True
316
+ self .assertEqual (list (s ), ref )
317
+ # Without whitespace_split, uses wordchars and splits on all.
318
+ ref = ['\u2119 ' , '\u01b4 ' , '\u2602 ' , '\u210c ' , '\u00f8 ' , '\u1f24 ' ]
319
+ s = shlex .shlex (ss , punctuation_chars = True )
320
+ self .assertEqual (list (s ), ref )
321
+
296
322
def testQuote (self ):
297
323
safeunquoted = string .ascii_letters + string .digits + '@%_-+=:,./'
298
324
unicode_sample = '\xe9 \xe0 \xdf ' # e + acute accent, a + grave, sharp s
0 commit comments