Skip to content

Commit 9b889b5

Browse files
authored
bpo-46315: Use fopencookie() to avoid dup() in _PyTokenizer_FindEncodingFilename (GH-32033)
WASI does not have dup() and Emscripten's emulation is slow.
1 parent e03db6d commit 9b889b5

File tree

1 file changed

+34
-6
lines changed

1 file changed

+34
-6
lines changed

Parser/tokenizer.c

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2072,6 +2072,39 @@ _PyTokenizer_Get(struct tok_state *tok,
20722072
return result;
20732073
}
20742074

2075+
#if defined(__wasi__) || defined(__EMSCRIPTEN__)
2076+
// fdopen() with borrowed fd. WASI does not provide dup() and Emscripten's
2077+
// dup() emulation with open() is slow.
2078+
typedef union {
2079+
void *cookie;
2080+
int fd;
2081+
} borrowed;
2082+
2083+
static ssize_t
2084+
borrow_read(void *cookie, char *buf, size_t size)
2085+
{
2086+
borrowed b = {.cookie = cookie};
2087+
return read(b.fd, (void *)buf, size);
2088+
}
2089+
2090+
static FILE *
2091+
fdopen_borrow(int fd) {
2092+
// supports only reading. seek fails. close and write are no-ops.
2093+
cookie_io_functions_t io_cb = {borrow_read, NULL, NULL, NULL};
2094+
borrowed b = {.fd = fd};
2095+
return fopencookie(b.cookie, "r", io_cb);
2096+
}
2097+
#else
2098+
static FILE *
2099+
fdopen_borrow(int fd) {
2100+
fd = _Py_dup(fd);
2101+
if (fd < 0) {
2102+
return NULL;
2103+
}
2104+
return fdopen(fd, "r");
2105+
}
2106+
#endif
2107+
20752108
/* Get the encoding of a Python file. Check for the coding cookie and check if
20762109
the file starts with a BOM.
20772110
@@ -2091,12 +2124,7 @@ _PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
20912124
const char *p_end = NULL;
20922125
char *encoding = NULL;
20932126

2094-
fd = _Py_dup(fd);
2095-
if (fd < 0) {
2096-
return NULL;
2097-
}
2098-
2099-
fp = fdopen(fd, "r");
2127+
fp = fdopen_borrow(fd);
21002128
if (fp == NULL) {
21012129
return NULL;
21022130
}

0 commit comments

Comments
 (0)