Skip to content

Commit 63429c8

Browse files
bpo-37412: os.getcwdb() now uses UTF-8 on Windows (GH-14396)
The os.getcwdb() function now uses the UTF-8 encoding on Windows, rather than the ANSI code page: see PEP 529 for the rationale. The function is no longer deprecated on Windows. os.getcwd() and os.getcwdb() now detect integer overflow on memory allocations. On Unix, these functions properly report MemoryError on memory allocation failure. (cherry picked from commit 689830e) Co-authored-by: Victor Stinner <[email protected]>
1 parent dd4edbc commit 63429c8

File tree

5 files changed

+88
-59
lines changed

5 files changed

+88
-59
lines changed

Doc/library/os.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1730,6 +1730,11 @@ features:
17301730

17311731
Return a bytestring representing the current working directory.
17321732

1733+
.. versionchanged:: 3.8
1734+
The function now uses the UTF-8 encoding on Windows, rather than the ANSI
1735+
code page: see :pep:`529` for the rationale. The function is no longer
1736+
deprecated on Windows.
1737+
17331738

17341739
.. function:: lchflags(path, flags)
17351740

Doc/whatsnew/3.8.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1228,6 +1228,11 @@ Changes in Python behavior
12281228
Changes in the Python API
12291229
-------------------------
12301230

1231+
* The :func:`os.getcwdb` function now uses the UTF-8 encoding on Windows,
1232+
rather than the ANSI code page: see :pep:`529` for the rationale. The
1233+
function is no longer deprecated on Windows.
1234+
(Contributed by Victor Stinner in :issue:`37412`.)
1235+
12311236
* :class:`subprocess.Popen` can now use :func:`os.posix_spawn` in some cases
12321237
for better performance. On Windows Subsystem for Linux and QEMU User
12331238
Emulation, Popen constructor using :func:`os.posix_spawn` no longer raise an

Lib/test/test_os.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,17 @@ def create_file(filename, content=b'content'):
8282
fp.write(content)
8383

8484

85+
class MiscTests(unittest.TestCase):
86+
def test_getcwd(self):
87+
cwd = os.getcwd()
88+
self.assertIsInstance(cwd, str)
89+
90+
def test_getcwdb(self):
91+
cwd = os.getcwdb()
92+
self.assertIsInstance(cwd, bytes)
93+
self.assertEqual(os.fsdecode(cwd), os.getcwd())
94+
95+
8596
# Tests creating TESTFN
8697
class FileTests(unittest.TestCase):
8798
def setUp(self):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
The :func:`os.getcwdb` function now uses the UTF-8 encoding on Windows,
2+
rather than the ANSI code page: see :pep:`529` for the rationale. The function
3+
is no longer deprecated on Windows.

Modules/posixmodule.c

Lines changed: 64 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -505,17 +505,6 @@ void _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *,
505505
ULONG, struct _Py_stat_struct *);
506506
#endif
507507

508-
#ifdef MS_WINDOWS
509-
static int
510-
win32_warn_bytes_api()
511-
{
512-
return PyErr_WarnEx(PyExc_DeprecationWarning,
513-
"The Windows bytes API has been deprecated, "
514-
"use Unicode filenames instead",
515-
1);
516-
}
517-
#endif
518-
519508

520509
#ifndef MS_WINDOWS
521510
PyObject *
@@ -3333,83 +3322,99 @@ os_lchown_impl(PyObject *module, path_t *path, uid_t uid, gid_t gid)
33333322
static PyObject *
33343323
posix_getcwd(int use_bytes)
33353324
{
3336-
char *buf, *tmpbuf;
3337-
char *cwd;
3338-
const size_t chunk = 1024;
3339-
size_t buflen = 0;
3340-
PyObject *obj;
3341-
33423325
#ifdef MS_WINDOWS
3343-
if (!use_bytes) {
3344-
wchar_t wbuf[MAXPATHLEN];
3345-
wchar_t *wbuf2 = wbuf;
3346-
PyObject *resobj;
3347-
DWORD len;
3348-
Py_BEGIN_ALLOW_THREADS
3349-
len = GetCurrentDirectoryW(Py_ARRAY_LENGTH(wbuf), wbuf);
3350-
/* If the buffer is large enough, len does not include the
3351-
terminating \0. If the buffer is too small, len includes
3352-
the space needed for the terminator. */
3353-
if (len >= Py_ARRAY_LENGTH(wbuf)) {
3326+
wchar_t wbuf[MAXPATHLEN];
3327+
wchar_t *wbuf2 = wbuf;
3328+
DWORD len;
3329+
3330+
Py_BEGIN_ALLOW_THREADS
3331+
len = GetCurrentDirectoryW(Py_ARRAY_LENGTH(wbuf), wbuf);
3332+
/* If the buffer is large enough, len does not include the
3333+
terminating \0. If the buffer is too small, len includes
3334+
the space needed for the terminator. */
3335+
if (len >= Py_ARRAY_LENGTH(wbuf)) {
3336+
if (len >= PY_SSIZE_T_MAX / sizeof(wchar_t)) {
33543337
wbuf2 = PyMem_RawMalloc(len * sizeof(wchar_t));
3355-
if (wbuf2)
3356-
len = GetCurrentDirectoryW(len, wbuf2);
33573338
}
3358-
Py_END_ALLOW_THREADS
3359-
if (!wbuf2) {
3360-
PyErr_NoMemory();
3361-
return NULL;
3339+
else {
3340+
wbuf2 = NULL;
33623341
}
3363-
if (!len) {
3364-
if (wbuf2 != wbuf)
3365-
PyMem_RawFree(wbuf2);
3366-
return PyErr_SetFromWindowsErr(0);
3342+
if (wbuf2) {
3343+
len = GetCurrentDirectoryW(len, wbuf2);
33673344
}
3368-
resobj = PyUnicode_FromWideChar(wbuf2, len);
3345+
}
3346+
Py_END_ALLOW_THREADS
3347+
3348+
if (!wbuf2) {
3349+
PyErr_NoMemory();
3350+
return NULL;
3351+
}
3352+
if (!len) {
33693353
if (wbuf2 != wbuf)
33703354
PyMem_RawFree(wbuf2);
3371-
return resobj;
3355+
return PyErr_SetFromWindowsErr(0);
33723356
}
33733357

3374-
if (win32_warn_bytes_api())
3375-
return NULL;
3376-
#endif
3358+
PyObject *resobj = PyUnicode_FromWideChar(wbuf2, len);
3359+
if (wbuf2 != wbuf) {
3360+
PyMem_RawFree(wbuf2);
3361+
}
3362+
3363+
if (use_bytes) {
3364+
if (resobj == NULL) {
3365+
return NULL;
3366+
}
3367+
Py_SETREF(resobj, PyUnicode_EncodeFSDefault(resobj));
3368+
}
3369+
3370+
return resobj;
3371+
#else
3372+
const size_t chunk = 1024;
3373+
3374+
char *buf = NULL;
3375+
char *cwd = NULL;
3376+
size_t buflen = 0;
33773377

3378-
buf = cwd = NULL;
33793378
Py_BEGIN_ALLOW_THREADS
33803379
do {
3381-
buflen += chunk;
3382-
#ifdef MS_WINDOWS
3383-
if (buflen > INT_MAX) {
3384-
PyErr_NoMemory();
3385-
break;
3380+
char *newbuf;
3381+
if (buflen <= PY_SSIZE_T_MAX - chunk) {
3382+
buflen += chunk;
3383+
newbuf = PyMem_RawRealloc(buf, buflen);
33863384
}
3387-
#endif
3388-
tmpbuf = PyMem_RawRealloc(buf, buflen);
3389-
if (tmpbuf == NULL)
3385+
else {
3386+
newbuf = NULL;
3387+
}
3388+
if (newbuf == NULL) {
3389+
PyMem_RawFree(buf);
3390+
buf = NULL;
33903391
break;
3392+
}
3393+
buf = newbuf;
33913394

3392-
buf = tmpbuf;
3393-
#ifdef MS_WINDOWS
3394-
cwd = getcwd(buf, (int)buflen);
3395-
#else
33963395
cwd = getcwd(buf, buflen);
3397-
#endif
33983396
} while (cwd == NULL && errno == ERANGE);
33993397
Py_END_ALLOW_THREADS
34003398

3399+
if (buf == NULL) {
3400+
return PyErr_NoMemory();
3401+
}
34013402
if (cwd == NULL) {
34023403
PyMem_RawFree(buf);
34033404
return posix_error();
34043405
}
34053406

3406-
if (use_bytes)
3407+
PyObject *obj;
3408+
if (use_bytes) {
34073409
obj = PyBytes_FromStringAndSize(buf, strlen(buf));
3408-
else
3410+
}
3411+
else {
34093412
obj = PyUnicode_DecodeFSDefault(buf);
3413+
}
34103414
PyMem_RawFree(buf);
34113415

34123416
return obj;
3417+
#endif /* !MS_WINDOWS */
34133418
}
34143419

34153420

0 commit comments

Comments
 (0)