Skip to content

Commit 689830e

Browse files
authored
bpo-37412: os.getcwdb() now uses UTF-8 on Windows (GH-14396)
The os.getcwdb() function now uses the UTF-8 encoding on Windows, rather than the ANSI code page: see PEP 529 for the rationale. The function is no longer deprecated on Windows. os.getcwd() and os.getcwdb() now detect integer overflow on memory allocations. On Unix, these functions properly report MemoryError on memory allocation failure.
1 parent c6a2320 commit 689830e

File tree

5 files changed

+88
-59
lines changed

5 files changed

+88
-59
lines changed

Doc/library/os.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1730,6 +1730,11 @@ features:
17301730

17311731
Return a bytestring representing the current working directory.
17321732

1733+
.. versionchanged:: 3.8
1734+
The function now uses the UTF-8 encoding on Windows, rather than the ANSI
1735+
code page: see :pep:`529` for the rationale. The function is no longer
1736+
deprecated on Windows.
1737+
17331738

17341739
.. function:: lchflags(path, flags)
17351740

Doc/whatsnew/3.8.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,6 +1231,11 @@ Changes in Python behavior
12311231
Changes in the Python API
12321232
-------------------------
12331233

1234+
* The :func:`os.getcwdb` function now uses the UTF-8 encoding on Windows,
1235+
rather than the ANSI code page: see :pep:`529` for the rationale. The
1236+
function is no longer deprecated on Windows.
1237+
(Contributed by Victor Stinner in :issue:`37412`.)
1238+
12341239
* :class:`subprocess.Popen` can now use :func:`os.posix_spawn` in some cases
12351240
for better performance. On Windows Subsystem for Linux and QEMU User
12361241
Emulation, Popen constructor using :func:`os.posix_spawn` no longer raise an

Lib/test/test_os.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,17 @@ def create_file(filename, content=b'content'):
8282
fp.write(content)
8383

8484

85+
class MiscTests(unittest.TestCase):
86+
def test_getcwd(self):
87+
cwd = os.getcwd()
88+
self.assertIsInstance(cwd, str)
89+
90+
def test_getcwdb(self):
91+
cwd = os.getcwdb()
92+
self.assertIsInstance(cwd, bytes)
93+
self.assertEqual(os.fsdecode(cwd), os.getcwd())
94+
95+
8596
# Tests creating TESTFN
8697
class FileTests(unittest.TestCase):
8798
def setUp(self):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
The :func:`os.getcwdb` function now uses the UTF-8 encoding on Windows,
2+
rather than the ANSI code page: see :pep:`529` for the rationale. The function
3+
is no longer deprecated on Windows.

Modules/posixmodule.c

Lines changed: 64 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -506,17 +506,6 @@ void _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *,
506506
ULONG, struct _Py_stat_struct *);
507507
#endif
508508

509-
#ifdef MS_WINDOWS
510-
static int
511-
win32_warn_bytes_api()
512-
{
513-
return PyErr_WarnEx(PyExc_DeprecationWarning,
514-
"The Windows bytes API has been deprecated, "
515-
"use Unicode filenames instead",
516-
1);
517-
}
518-
#endif
519-
520509

521510
#ifndef MS_WINDOWS
522511
PyObject *
@@ -3334,83 +3323,99 @@ os_lchown_impl(PyObject *module, path_t *path, uid_t uid, gid_t gid)
33343323
static PyObject *
33353324
posix_getcwd(int use_bytes)
33363325
{
3337-
char *buf, *tmpbuf;
3338-
char *cwd;
3339-
const size_t chunk = 1024;
3340-
size_t buflen = 0;
3341-
PyObject *obj;
3342-
33433326
#ifdef MS_WINDOWS
3344-
if (!use_bytes) {
3345-
wchar_t wbuf[MAXPATHLEN];
3346-
wchar_t *wbuf2 = wbuf;
3347-
PyObject *resobj;
3348-
DWORD len;
3349-
Py_BEGIN_ALLOW_THREADS
3350-
len = GetCurrentDirectoryW(Py_ARRAY_LENGTH(wbuf), wbuf);
3351-
/* If the buffer is large enough, len does not include the
3352-
terminating \0. If the buffer is too small, len includes
3353-
the space needed for the terminator. */
3354-
if (len >= Py_ARRAY_LENGTH(wbuf)) {
3327+
wchar_t wbuf[MAXPATHLEN];
3328+
wchar_t *wbuf2 = wbuf;
3329+
DWORD len;
3330+
3331+
Py_BEGIN_ALLOW_THREADS
3332+
len = GetCurrentDirectoryW(Py_ARRAY_LENGTH(wbuf), wbuf);
3333+
/* If the buffer is large enough, len does not include the
3334+
terminating \0. If the buffer is too small, len includes
3335+
the space needed for the terminator. */
3336+
if (len >= Py_ARRAY_LENGTH(wbuf)) {
3337+
if (len >= PY_SSIZE_T_MAX / sizeof(wchar_t)) {
33553338
wbuf2 = PyMem_RawMalloc(len * sizeof(wchar_t));
3356-
if (wbuf2)
3357-
len = GetCurrentDirectoryW(len, wbuf2);
33583339
}
3359-
Py_END_ALLOW_THREADS
3360-
if (!wbuf2) {
3361-
PyErr_NoMemory();
3362-
return NULL;
3340+
else {
3341+
wbuf2 = NULL;
33633342
}
3364-
if (!len) {
3365-
if (wbuf2 != wbuf)
3366-
PyMem_RawFree(wbuf2);
3367-
return PyErr_SetFromWindowsErr(0);
3343+
if (wbuf2) {
3344+
len = GetCurrentDirectoryW(len, wbuf2);
33683345
}
3369-
resobj = PyUnicode_FromWideChar(wbuf2, len);
3346+
}
3347+
Py_END_ALLOW_THREADS
3348+
3349+
if (!wbuf2) {
3350+
PyErr_NoMemory();
3351+
return NULL;
3352+
}
3353+
if (!len) {
33703354
if (wbuf2 != wbuf)
33713355
PyMem_RawFree(wbuf2);
3372-
return resobj;
3356+
return PyErr_SetFromWindowsErr(0);
33733357
}
33743358

3375-
if (win32_warn_bytes_api())
3376-
return NULL;
3377-
#endif
3359+
PyObject *resobj = PyUnicode_FromWideChar(wbuf2, len);
3360+
if (wbuf2 != wbuf) {
3361+
PyMem_RawFree(wbuf2);
3362+
}
3363+
3364+
if (use_bytes) {
3365+
if (resobj == NULL) {
3366+
return NULL;
3367+
}
3368+
Py_SETREF(resobj, PyUnicode_EncodeFSDefault(resobj));
3369+
}
3370+
3371+
return resobj;
3372+
#else
3373+
const size_t chunk = 1024;
3374+
3375+
char *buf = NULL;
3376+
char *cwd = NULL;
3377+
size_t buflen = 0;
33783378

3379-
buf = cwd = NULL;
33803379
Py_BEGIN_ALLOW_THREADS
33813380
do {
3382-
buflen += chunk;
3383-
#ifdef MS_WINDOWS
3384-
if (buflen > INT_MAX) {
3385-
PyErr_NoMemory();
3386-
break;
3381+
char *newbuf;
3382+
if (buflen <= PY_SSIZE_T_MAX - chunk) {
3383+
buflen += chunk;
3384+
newbuf = PyMem_RawRealloc(buf, buflen);
33873385
}
3388-
#endif
3389-
tmpbuf = PyMem_RawRealloc(buf, buflen);
3390-
if (tmpbuf == NULL)
3386+
else {
3387+
newbuf = NULL;
3388+
}
3389+
if (newbuf == NULL) {
3390+
PyMem_RawFree(buf);
3391+
buf = NULL;
33913392
break;
3393+
}
3394+
buf = newbuf;
33923395

3393-
buf = tmpbuf;
3394-
#ifdef MS_WINDOWS
3395-
cwd = getcwd(buf, (int)buflen);
3396-
#else
33973396
cwd = getcwd(buf, buflen);
3398-
#endif
33993397
} while (cwd == NULL && errno == ERANGE);
34003398
Py_END_ALLOW_THREADS
34013399

3400+
if (buf == NULL) {
3401+
return PyErr_NoMemory();
3402+
}
34023403
if (cwd == NULL) {
34033404
PyMem_RawFree(buf);
34043405
return posix_error();
34053406
}
34063407

3407-
if (use_bytes)
3408+
PyObject *obj;
3409+
if (use_bytes) {
34083410
obj = PyBytes_FromStringAndSize(buf, strlen(buf));
3409-
else
3411+
}
3412+
else {
34103413
obj = PyUnicode_DecodeFSDefault(buf);
3414+
}
34113415
PyMem_RawFree(buf);
34123416

34133417
return obj;
3418+
#endif /* !MS_WINDOWS */
34143419
}
34153420

34163421

0 commit comments

Comments
 (0)