Skip to content

Commit ea12bf4

Browse files
authored
Windows shlex fix (#2895)
1 parent 8736549 commit ea12bf4

File tree

6 files changed

+208
-16
lines changed

6 files changed

+208
-16
lines changed

docs/changelog/2635.bugfix.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
When parsing command lines, use ``shlex(..., posix=True)``, even on windows platforms, since non-POSIX mode does not
2+
handle escape characters and quoting like a shell would. This improves cross-platform configurations without hacks or
3+
esoteric quoting.
4+
5+
To make this transition easier, on Windows, the backslash path separator will not treated as an escape character unless
6+
it preceeds a quote, whitespace, or another backslash chracter. This allows paths to mostly be written in single or
7+
double backslash style.
8+
9+
Note that **double-backslash will no longer be escaped to a single backslash in substitutions**, instead the double
10+
backslash will be consumed as part of command splitting, on either posix or windows platforms.
11+
12+
In some instances superfluous double or single quote characters may be stripped from arg arrays in ways that do not
13+
occur in the default windows ``cmd.exe`` shell - by :user:`masenf`.

docs/config.rst

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,16 @@ Run
431431
that outside of it. Therefore ``python`` translates as the virtual environments ``python`` (having the same
432432
runtime version as the :ref:`base_python`), and ``pip`` translates as the virtual environments ``pip``.
433433

434+
.. note::
435+
436+
``shlex`` POSIX-mode quoting rules are used to split the command line into arguments on all
437+
supported platforms as of tox 4.4.0.
438+
439+
The backslash ``\`` character can be used to escape quotes, whitespace, itself, and
440+
other characters (except on Windows, where a backslash in a path will not be interpreted as an escape).
441+
Unescaped single quote will disable the backslash escape until closed by another unescaped single quote.
442+
For more details, please see :doc:`shlex parsing rules <python:library/shlex>`.
443+
434444
.. note::
435445

436446
Inline scripts can be used, however note these are discovered from the project root directory, and is not
@@ -795,14 +805,18 @@ through the ``{...}`` string-substitution pattern.
795805

796806
The string inside the curly braces may reference a global or per-environment config key as described above.
797807

798-
The backslash character ``\`` will act as an escape for a following: ``\``,
808+
In substitutions, the backslash character ``\`` will act as an escape when preceeding
799809
``{``, ``}``, ``:``, ``[``, or ``]``, otherwise the backslash will be
800810
reproduced literally::
801811

802812
commands =
803813
python -c 'print("\{posargs} = \{}".format("{posargs}"))'
804814
python -c 'print("host: \{}".format("{env:HOSTNAME:host\: not set}")'
805815

816+
Note that any backslashes remaining after substitution may be processed by ``shlex`` during command parsing. On POSIX
817+
platforms, the backslash will escape any following character; on windows, the backslash will escape any following quote,
818+
whitespace, or backslash character (since it normally acts as a path delimiter).
819+
806820
Special substitutions that accept additional colon-delimited ``:`` parameters
807821
cannot have a space after the ``:`` at the beginning of line (e.g. ``{posargs:
808822
magic}`` would be parsed as factorial ``{posargs``, having value magic).

src/tox/config/loader/ini/replace.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
ARG_DELIMITER = ":"
3131
REPLACE_START = "{"
3232
REPLACE_END = "}"
33-
BACKSLASH_ESCAPE_CHARS = ["\\", ARG_DELIMITER, REPLACE_START, REPLACE_END, "[", "]"]
33+
BACKSLASH_ESCAPE_CHARS = [ARG_DELIMITER, REPLACE_START, REPLACE_END, "[", "]"]
3434
MAX_REPLACE_DEPTH = 100
3535

3636

@@ -115,11 +115,18 @@ def parse_and_split_to_terminator(
115115
pos = 0
116116

117117
while pos < len(value):
118-
if len(value) > pos + 1 and value[pos] == "\\" and value[pos + 1] in BACKSLASH_ESCAPE_CHARS:
119-
# backslash escapes the next character from a special set
120-
last_arg.append(value[pos + 1])
121-
pos += 2
122-
continue
118+
if len(value) > pos + 1 and value[pos] == "\\":
119+
if value[pos + 1] in BACKSLASH_ESCAPE_CHARS:
120+
# backslash escapes the next character from a special set
121+
last_arg.append(value[pos + 1])
122+
pos += 2
123+
continue
124+
if value[pos + 1] == "\\":
125+
# backlash doesn't escape a backslash, but does prevent it from affecting the next char
126+
# a subsequent `shlex` pass will eat the double backslash during command splitting
127+
last_arg.append(value[pos : pos + 2])
128+
pos += 2
129+
continue
123130
fragment = value[pos:]
124131
if terminator and fragment.startswith(terminator):
125132
pos += len(terminator)

src/tox/config/loader/str_convert.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,43 @@ def to_dict(value: str, of_type: tuple[type[Any], type[Any]]) -> Iterator[tuple[
4545
else:
4646
raise TypeError(f"dictionary lines must be of form key=value, found {row!r}")
4747

48+
@staticmethod
49+
def _win32_process_path_backslash(value: str, escape: str, special_chars: str) -> str:
50+
"""
51+
Escape backslash in value that is not followed by a special character.
52+
53+
This allows windows paths to be written without double backslash, while
54+
retaining the POSIX backslash escape semantics for quotes and escapes.
55+
"""
56+
result = []
57+
for ix, char in enumerate(value):
58+
result.append(char)
59+
if char == escape:
60+
last_char = value[ix - 1 : ix]
61+
if last_char == escape:
62+
continue
63+
next_char = value[ix + 1 : ix + 2]
64+
if next_char not in (escape, *special_chars):
65+
result.append(escape) # escape escapes that are not themselves escaping a special character
66+
return "".join(result)
67+
4868
@staticmethod
4969
def to_command(value: str) -> Command:
50-
is_win = sys.platform == "win32"
70+
"""
71+
At this point, ``value`` has already been substituted out, and all punctuation / escapes are final.
72+
73+
Value will typically be stripped of whitespace when coming from an ini file.
74+
"""
5175
value = value.replace(r"\#", "#")
52-
splitter = shlex.shlex(value, posix=not is_win)
76+
is_win = sys.platform == "win32"
77+
if is_win: # pragma: win32 cover
78+
s = shlex.shlex(posix=True)
79+
value = StrConvert._win32_process_path_backslash(
80+
value,
81+
escape=s.escape,
82+
special_chars=s.quotes + s.whitespace,
83+
)
84+
splitter = shlex.shlex(value, posix=True)
5385
splitter.whitespace_split = True
5486
splitter.commenters = "" # comments handled earlier, and the shlex does not know escaped comment characters
5587
args: list[str] = []

tests/config/loader/ini/replace/test_replace_env_var.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,24 @@ def test_replace_env_set(replace_one: ReplaceOne, monkeypatch: MonkeyPatch) -> N
1717

1818

1919
def test_replace_env_set_double_bs(replace_one: ReplaceOne, monkeypatch: MonkeyPatch) -> None:
20-
"""Double backslash should escape to single backslash and not affect surrounding replacements."""
20+
"""Double backslash should remain but not affect surrounding replacements."""
2121
monkeypatch.setenv("MAGIC", "something good")
2222
result = replace_one(r"{env:MAGIC}\\{env:MAGIC}")
23-
assert result == r"something good\something good"
23+
assert result == r"something good\\something good"
2424

2525

2626
def test_replace_env_set_triple_bs(replace_one: ReplaceOne, monkeypatch: MonkeyPatch) -> None:
27-
"""Triple backslash should escape to single backslash also escape subsequent replacement."""
27+
"""Triple backslash should retain two slashes with the third escaping subsequent replacement."""
2828
monkeypatch.setenv("MAGIC", "something good")
2929
result = replace_one(r"{env:MAGIC}\\\{env:MAGIC}")
30-
assert result == r"something good\{env:MAGIC}"
30+
assert result == r"something good\\{env:MAGIC}"
3131

3232

3333
def test_replace_env_set_quad_bs(replace_one: ReplaceOne, monkeypatch: MonkeyPatch) -> None:
34-
"""Quad backslash should escape to two backslashes and not affect surrounding replacements."""
34+
"""Quad backslash should remain but not affect surrounding replacements."""
3535
monkeypatch.setenv("MAGIC", "something good")
36-
result = replace_one(r"\\{env:MAGIC}\\\\{env:MAGIC}\\")
37-
assert result == r"\something good\\something good" + "\\"
36+
result = replace_one(r"\\{env:MAGIC}\\\\{env:MAGIC}" + "\\")
37+
assert result == r"\\something good\\\\something good" + "\\"
3838

3939

4040
def test_replace_env_when_value_is_backslash(replace_one: ReplaceOne, monkeypatch: MonkeyPatch) -> None:

tests/config/loader/test_str_convert.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22

33
import sys
44
from pathlib import Path
5+
from textwrap import dedent
56
from typing import Any, Dict, List, Optional, Set, TypeVar, Union
67

78
import pytest
89

910
from tox.config.loader.str_convert import StrConvert
1011
from tox.config.types import Command, EnvList
12+
from tox.pytest import MonkeyPatch, SubRequest, ToxProjectCreator
1113

1214
if sys.version_info >= (3, 8): # pragma: no cover (py38+)
1315
from typing import Literal
@@ -80,3 +82,127 @@ def test_str_convert_nok(raw: str, of_type: type[Any], msg: str, exc_type: type[
8082
def test_invalid_shell_expression(value: str, expected: list[str]) -> None:
8183
result = StrConvert().to_command(value).args
8284
assert result == expected
85+
86+
87+
SIMPLE_ARGS = [
88+
('foo "bar baz"', ["foo", "bar baz"]),
89+
('foo "bar baz"ext', ["foo", "bar bazext"]),
90+
('foo="bar baz"', ["foo=bar baz"]),
91+
("foo 'bar baz'", ["foo", "bar baz"]),
92+
("foo 'bar baz'ext", ["foo", "bar bazext"]),
93+
("foo='bar baz'", ["foo=bar baz"]),
94+
(r"foo=\"bar baz\"", ['foo="bar', 'baz"']),
95+
(r'foo="bar baz\"', ['foo="bar baz\\"']),
96+
("foo='bar baz' quuc", ["foo=bar baz", "quuc"]),
97+
(r"foo='bar baz\' quuc", ["foo=bar baz\\", "quuc"]),
98+
(r"foo=\"bar baz\' quuc", ['foo="bar', "baz'", "quuc"]),
99+
(r"foo=\\\"bar baz\"", ['foo=\\"bar', 'baz"']),
100+
(r'foo=\\"bar baz\"', [r'foo=\\"bar baz\"']),
101+
]
102+
NEWLINE_ARGS = [
103+
('foo\n"bar\nbaz"', ["foo", "bar\nbaz"]),
104+
]
105+
INI_CONFIG_NEWLINE_ARGS = [
106+
('foo\\\n "bar\\\n baz"', ["foobarbaz"]), # behavior change from tox 3
107+
('foo\\\n "bar \\\n baz"', ["foobar baz"]), # behavior change from tox 3
108+
('foo \\\n "bar\\\n baz"', ["foo", "barbaz"]),
109+
('foo \\\n "bar \\\n baz"', ["foo", "bar baz"]),
110+
('foo \\\n \\"bar \\\n baz"', ["foo", '"bar', 'baz"']),
111+
("foo \\\n bar \\\n baz", ["foo", "bar", "baz"]),
112+
]
113+
WINDOWS_PATH_ARGS = [
114+
(r"SPECIAL:\foo\bar --quuz='baz atan'", [r"SPECIAL:\foo\bar", "--quuz=baz atan"]),
115+
(r"X:\\foo\\bar --quuz='baz atan'", [r"X:\foo\bar", "--quuz=baz atan"]),
116+
("/foo/bar --quuz='baz atan'", ["/foo/bar", "--quuz=baz atan"]),
117+
('cc --arg "C:\\\\Users\\""', ["cc", "--arg", 'C:\\Users"']),
118+
('cc --arg "C:\\\\Users\\"', ["cc", "--arg", '"C:\\\\Users\\"']),
119+
('cc --arg "C:\\\\Users"', ["cc", "--arg", "C:\\Users"]),
120+
('cc --arg \\"C:\\\\Users"', ["cc", "--arg", '\\"C:\\\\Users"']),
121+
('cc --arg "C:\\\\Users\\ "', ["cc", "--arg", "C:\\Users\\ "]),
122+
# ('cc --arg "C:\\\\Users\\ ', ["cc", "--arg", '"C:\\\\Users\\ ']),
123+
('cc --arg "C:\\\\Users\\\\"', ["cc", "--arg", "C:\\Users\\"]),
124+
('cc --arg "C:\\\\Users\\\\ "', ["cc", "--arg", "C:\\Users\\ "]),
125+
# ('cc --arg "C:\\\\Users\\\\ ', ["cc", "--arg", '"C:\\\\Users\\\\ ']),
126+
(
127+
r'cc --arg C:\\Users\\ --arg2 "SPECIAL:\Temp\f o o" --arg3="\\FOO\share\Path name" --arg4 SPECIAL:\Temp\ '[:-1],
128+
[
129+
"cc",
130+
"--arg",
131+
"C:\\Users\\",
132+
"--arg2",
133+
"SPECIAL:\\Temp\\f o o",
134+
"--arg3=\\FOO\\share\\Path name",
135+
"--arg4",
136+
"SPECIAL:\\Temp\\",
137+
],
138+
),
139+
]
140+
WACKY_SLASH_ARGS = [
141+
("\\\\\\", ["\\\\\\"]),
142+
(" \\'\\'\\ '", [" \\'\\'\\ '"]),
143+
("\\'\\'\\ ", ["'' "]),
144+
("\\'\\ \\\\", ["' \\"]),
145+
("\\'\\ ", ["' "]),
146+
('''"\\'\\"''', ['"\\\'\\"']),
147+
("'\\' \\\\", ["\\", "\\"]),
148+
('"\\\\" \\\\', ["\\", "\\"]),
149+
]
150+
151+
152+
@pytest.fixture(params=["win32", "linux2"])
153+
def sys_platform(request: SubRequest, monkeypatch: MonkeyPatch) -> str:
154+
monkeypatch.setattr(sys, "platform", request.param)
155+
return str(request.param)
156+
157+
158+
@pytest.mark.parametrize(
159+
("value", "expected"),
160+
[
161+
*SIMPLE_ARGS,
162+
*NEWLINE_ARGS,
163+
*WINDOWS_PATH_ARGS,
164+
*WACKY_SLASH_ARGS,
165+
],
166+
)
167+
def test_shlex_platform_specific(sys_platform: str, value: str, expected: list[str]) -> None:
168+
if sys_platform != "win32" and value.startswith("SPECIAL:"):
169+
# on non-windows platform, backslash is always an escape, not path separator
170+
expected = [exp.replace("\\", "") for exp in expected]
171+
result = StrConvert().to_command(value).args
172+
assert result == expected
173+
174+
175+
@pytest.mark.parametrize(
176+
("value", "expected"),
177+
[
178+
*SIMPLE_ARGS,
179+
*INI_CONFIG_NEWLINE_ARGS,
180+
*WINDOWS_PATH_ARGS,
181+
# *WACKY_SLASH_ARGS,
182+
],
183+
)
184+
def test_shlex_platform_specific_ini(
185+
tox_project: ToxProjectCreator,
186+
sys_platform: str,
187+
value: str,
188+
expected: list[str],
189+
) -> None:
190+
if sys_platform != "win32" and value.startswith("SPECIAL:"):
191+
# on non-windows platform, backslash is always an escape, not path separator
192+
expected = [exp.replace("\\", "") for exp in expected]
193+
project = tox_project(
194+
{
195+
"tox.ini": dedent(
196+
"""
197+
[testenv]
198+
commands =
199+
%s""",
200+
)
201+
% value,
202+
},
203+
)
204+
outcome = project.run("c")
205+
outcome.assert_success()
206+
env_config = outcome.env_conf("py")
207+
result = env_config["commands"]
208+
assert result == [Command(args=expected)]

0 commit comments

Comments
 (0)