Skip to content

Commit d5a8d4b

Browse files
[3.12] gh-119614: Fix truncation of strings with embedded null characters in Tkinter (GH-120909) (GH-120939)
Now the null character is always represented as \xc0\x80 for Tcl_NewStringObj(). (cherry picked from commit c38e2f6) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent 3afb856 commit d5a8d4b

File tree

4 files changed

+68
-7
lines changed

4 files changed

+68
-7
lines changed

Lib/test/test_tcl.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,18 @@ def testCall(self):
7373
tcl.call('set','a','1')
7474
self.assertEqual(tcl.call('set','a'),'1')
7575

76+
def test_call_passing_null(self):
77+
tcl = self.interp
78+
tcl.call('set', 'a', 'a\0b') # ASCII-only
79+
self.assertEqual(tcl.getvar('a'), 'a\x00b')
80+
self.assertEqual(tcl.call('set', 'a'), 'a\x00b')
81+
self.assertEqual(tcl.eval('set a'), 'a\x00b')
82+
83+
tcl.call('set', 'a', '\u20ac\0') # non-ASCII
84+
self.assertEqual(tcl.getvar('a'), '\u20ac\x00')
85+
self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00')
86+
self.assertEqual(tcl.eval('set a'), '\u20ac\x00')
87+
7688
def testCallException(self):
7789
tcl = self.interp
7890
self.assertRaises(TclError,tcl.call,'set','a')
@@ -98,6 +110,18 @@ def testSetVar(self):
98110
tcl.setvar('a','1')
99111
self.assertEqual(tcl.eval('set a'),'1')
100112

113+
def test_setvar_passing_null(self):
114+
tcl = self.interp
115+
tcl.setvar('a', 'a\0b') # ASCII-only
116+
self.assertEqual(tcl.getvar('a'), 'a\x00b')
117+
self.assertEqual(tcl.call('set', 'a'), 'a\x00b')
118+
self.assertEqual(tcl.eval('set a'), 'a\x00b')
119+
120+
tcl.setvar('a', '\u20ac\0') # non-ASCII
121+
self.assertEqual(tcl.getvar('a'), '\u20ac\x00')
122+
self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00')
123+
self.assertEqual(tcl.eval('set a'), '\u20ac\x00')
124+
101125
def testSetVarArray(self):
102126
tcl = self.interp
103127
tcl.setvar('a(1)','1')

Lib/test/test_tkinter/test_misc.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,15 @@ def test_info_patchlevel(self):
382382
self.assertEqual(vi.micro, 0)
383383
self.assertTrue(str(vi).startswith(f'{vi.major}.{vi.minor}'))
384384

385+
def test_embedded_null(self):
386+
widget = tkinter.Entry(self.root)
387+
widget.insert(0, 'abc\0def') # ASCII-only
388+
widget.selection_range(0, 'end')
389+
self.assertEqual(widget.selection_get(), 'abc\x00def')
390+
widget.insert(0, '\u20ac\0') # non-ASCII
391+
widget.selection_range(0, 'end')
392+
self.assertEqual(widget.selection_get(), '\u20ac\0abc\x00def')
393+
385394

386395
class EventTest(AbstractTkTest, unittest.TestCase):
387396

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix truncation of strings with embedded null characters in some internal
2+
operations in :mod:`tkinter`.

Modules/_tkinter.c

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,7 @@ unicodeFromTclObj(TkappObject *tkapp, Tcl_Obj *value)
516516
else
517517
Py_UNREACHABLE();
518518
}
519-
#endif
519+
#endif /* USE_TCL_UNICODE */
520520
const char *s = Tcl_GetStringFromObj(value, &len);
521521
return unicodeFromTclStringAndSize(s, len);
522522
}
@@ -1024,7 +1024,9 @@ AsObj(PyObject *value)
10241024
PyErr_SetString(PyExc_OverflowError, "string is too long");
10251025
return NULL;
10261026
}
1027-
if (PyUnicode_IS_ASCII(value)) {
1027+
if (PyUnicode_IS_ASCII(value) &&
1028+
strlen(PyUnicode_DATA(value)) == (size_t)PyUnicode_GET_LENGTH(value))
1029+
{
10281030
return Tcl_NewStringObj((const char *)PyUnicode_DATA(value),
10291031
(int)size);
10301032
}
@@ -1039,9 +1041,6 @@ AsObj(PyObject *value)
10391041
"surrogatepass", NATIVE_BYTEORDER);
10401042
else
10411043
Py_UNREACHABLE();
1042-
#else
1043-
encoded = _PyUnicode_AsUTF8String(value, "surrogateescape");
1044-
#endif
10451044
if (!encoded) {
10461045
return NULL;
10471046
}
@@ -1051,12 +1050,39 @@ AsObj(PyObject *value)
10511050
PyErr_SetString(PyExc_OverflowError, "string is too long");
10521051
return NULL;
10531052
}
1054-
#if USE_TCL_UNICODE
10551053
result = Tcl_NewUnicodeObj((const Tcl_UniChar *)PyBytes_AS_STRING(encoded),
10561054
(int)(size / sizeof(Tcl_UniChar)));
10571055
#else
1056+
encoded = _PyUnicode_AsUTF8String(value, "surrogateescape");
1057+
if (!encoded) {
1058+
return NULL;
1059+
}
1060+
size = PyBytes_GET_SIZE(encoded);
1061+
if (strlen(PyBytes_AS_STRING(encoded)) != (size_t)size) {
1062+
/* The string contains embedded null characters.
1063+
* Tcl needs a null character to be represented as \xc0\x80 in
1064+
* the Modified UTF-8 encoding. Otherwise the string can be
1065+
* truncated in some internal operations.
1066+
*
1067+
* NOTE: stringlib_replace() could be used here, but optimizing
1068+
* this obscure case isn't worth it unless stringlib_replace()
1069+
* was already exposed in the C API for other reasons. */
1070+
Py_SETREF(encoded,
1071+
PyObject_CallMethod(encoded, "replace", "y#y#",
1072+
"\0", (Py_ssize_t)1,
1073+
"\xc0\x80", (Py_ssize_t)2));
1074+
if (!encoded) {
1075+
return NULL;
1076+
}
1077+
size = PyBytes_GET_SIZE(encoded);
1078+
}
1079+
if (size > INT_MAX) {
1080+
Py_DECREF(encoded);
1081+
PyErr_SetString(PyExc_OverflowError, "string is too long");
1082+
return NULL;
1083+
}
10581084
result = Tcl_NewStringObj(PyBytes_AS_STRING(encoded), (int)size);
1059-
#endif
1085+
#endif /* USE_TCL_UNICODE */
10601086
Py_DECREF(encoded);
10611087
return result;
10621088
}

0 commit comments

Comments
 (0)