|
3 | 3 | #include "pycore_ast.h" // _PyAST_GetDocString()
|
4 | 4 | #include "pycore_compile.h" // _PyASTOptimizeState
|
5 | 5 | #include "pycore_pystate.h" // _PyThreadState_GET()
|
| 6 | +#include "pycore_format.h" // F_LJUST |
6 | 7 |
|
7 | 8 |
|
8 | 9 | static int
|
@@ -224,17 +225,235 @@ safe_mod(PyObject *v, PyObject *w)
|
224 | 225 | return PyNumber_Remainder(v, w);
|
225 | 226 | }
|
226 | 227 |
|
| 228 | + |
| 229 | +static expr_ty |
| 230 | +parse_literal(PyObject *fmt, Py_ssize_t *ppos, PyArena *arena) |
| 231 | +{ |
| 232 | + const void *data = PyUnicode_DATA(fmt); |
| 233 | + int kind = PyUnicode_KIND(fmt); |
| 234 | + Py_ssize_t size = PyUnicode_GET_LENGTH(fmt); |
| 235 | + Py_ssize_t start, pos; |
| 236 | + int has_percents = 0; |
| 237 | + start = pos = *ppos; |
| 238 | + while (pos < size) { |
| 239 | + if (PyUnicode_READ(kind, data, pos) != '%') { |
| 240 | + pos++; |
| 241 | + } |
| 242 | + else if (pos+1 < size && PyUnicode_READ(kind, data, pos+1) == '%') { |
| 243 | + has_percents = 1; |
| 244 | + pos += 2; |
| 245 | + } |
| 246 | + else { |
| 247 | + break; |
| 248 | + } |
| 249 | + } |
| 250 | + *ppos = pos; |
| 251 | + if (pos == start) { |
| 252 | + return NULL; |
| 253 | + } |
| 254 | + PyObject *str = PyUnicode_Substring(fmt, start, pos); |
| 255 | + /* str = str.replace('%%', '%') */ |
| 256 | + if (str && has_percents) { |
| 257 | + _Py_static_string(PyId_double_percent, "%%"); |
| 258 | + _Py_static_string(PyId_percent, "%"); |
| 259 | + PyObject *double_percent = _PyUnicode_FromId(&PyId_double_percent); |
| 260 | + PyObject *percent = _PyUnicode_FromId(&PyId_percent); |
| 261 | + if (!double_percent || !percent) { |
| 262 | + Py_DECREF(str); |
| 263 | + return NULL; |
| 264 | + } |
| 265 | + Py_SETREF(str, PyUnicode_Replace(str, double_percent, percent, -1)); |
| 266 | + } |
| 267 | + if (!str) { |
| 268 | + return NULL; |
| 269 | + } |
| 270 | + |
| 271 | + if (_PyArena_AddPyObject(arena, str) < 0) { |
| 272 | + Py_DECREF(str); |
| 273 | + return NULL; |
| 274 | + } |
| 275 | + return _PyAST_Constant(str, NULL, -1, -1, -1, -1, arena); |
| 276 | +} |
| 277 | + |
| 278 | +#define MAXDIGITS 3 |
| 279 | + |
| 280 | +static int |
| 281 | +simple_format_arg_parse(PyObject *fmt, Py_ssize_t *ppos, |
| 282 | + int *spec, int *flags, int *width, int *prec) |
| 283 | +{ |
| 284 | + Py_ssize_t pos = *ppos, len = PyUnicode_GET_LENGTH(fmt); |
| 285 | + Py_UCS4 ch; |
| 286 | + |
| 287 | +#define NEXTC do { \ |
| 288 | + if (pos >= len) { \ |
| 289 | + return 0; \ |
| 290 | + } \ |
| 291 | + ch = PyUnicode_READ_CHAR(fmt, pos); \ |
| 292 | + pos++; \ |
| 293 | +} while (0) |
| 294 | + |
| 295 | + *flags = 0; |
| 296 | + while (1) { |
| 297 | + NEXTC; |
| 298 | + switch (ch) { |
| 299 | + case '-': *flags |= F_LJUST; continue; |
| 300 | + case '+': *flags |= F_SIGN; continue; |
| 301 | + case ' ': *flags |= F_BLANK; continue; |
| 302 | + case '#': *flags |= F_ALT; continue; |
| 303 | + case '0': *flags |= F_ZERO; continue; |
| 304 | + } |
| 305 | + break; |
| 306 | + } |
| 307 | + if ('0' <= ch && ch <= '9') { |
| 308 | + *width = 0; |
| 309 | + int digits = 0; |
| 310 | + while ('0' <= ch && ch <= '9') { |
| 311 | + *width = *width * 10 + (ch - '0'); |
| 312 | + NEXTC; |
| 313 | + if (++digits >= MAXDIGITS) { |
| 314 | + return 0; |
| 315 | + } |
| 316 | + } |
| 317 | + } |
| 318 | + |
| 319 | + if (ch == '.') { |
| 320 | + NEXTC; |
| 321 | + if ('0' <= ch && ch <= '9') { |
| 322 | + *prec = 0; |
| 323 | + int digits = 0; |
| 324 | + while ('0' <= ch && ch <= '9') { |
| 325 | + *prec = *prec * 10 + (ch - '0'); |
| 326 | + NEXTC; |
| 327 | + if (++digits >= MAXDIGITS) { |
| 328 | + return 0; |
| 329 | + } |
| 330 | + } |
| 331 | + } |
| 332 | + } |
| 333 | + *spec = ch; |
| 334 | + *ppos = pos; |
| 335 | + return 1; |
| 336 | + |
| 337 | +#undef NEXTC |
| 338 | +} |
| 339 | + |
| 340 | +static expr_ty |
| 341 | +parse_format(PyObject *fmt, Py_ssize_t *ppos, expr_ty arg, PyArena *arena) |
| 342 | +{ |
| 343 | + int spec, flags, width = -1, prec = -1; |
| 344 | + if (!simple_format_arg_parse(fmt, ppos, &spec, &flags, &width, &prec)) { |
| 345 | + // Unsupported format. |
| 346 | + return NULL; |
| 347 | + } |
| 348 | + if (spec == 's' || spec == 'r' || spec == 'a') { |
| 349 | + char buf[1 + MAXDIGITS + 1 + MAXDIGITS + 1], *p = buf; |
| 350 | + if (!(flags & F_LJUST) && width > 0) { |
| 351 | + *p++ = '>'; |
| 352 | + } |
| 353 | + if (width >= 0) { |
| 354 | + p += snprintf(p, MAXDIGITS + 1, "%d", width); |
| 355 | + } |
| 356 | + if (prec >= 0) { |
| 357 | + p += snprintf(p, MAXDIGITS + 2, ".%d", prec); |
| 358 | + } |
| 359 | + expr_ty format_spec = NULL; |
| 360 | + if (p != buf) { |
| 361 | + PyObject *str = PyUnicode_FromString(buf); |
| 362 | + if (str == NULL) { |
| 363 | + return NULL; |
| 364 | + } |
| 365 | + if (_PyArena_AddPyObject(arena, str) < 0) { |
| 366 | + Py_DECREF(str); |
| 367 | + return NULL; |
| 368 | + } |
| 369 | + format_spec = _PyAST_Constant(str, NULL, -1, -1, -1, -1, arena); |
| 370 | + if (format_spec == NULL) { |
| 371 | + return NULL; |
| 372 | + } |
| 373 | + } |
| 374 | + return _PyAST_FormattedValue(arg, spec, format_spec, |
| 375 | + arg->lineno, arg->col_offset, |
| 376 | + arg->end_lineno, arg->end_col_offset, |
| 377 | + arena); |
| 378 | + } |
| 379 | + // Unsupported format. |
| 380 | + return NULL; |
| 381 | +} |
| 382 | + |
| 383 | +static int |
| 384 | +optimize_format(expr_ty node, PyObject *fmt, asdl_expr_seq *elts, PyArena *arena) |
| 385 | +{ |
| 386 | + Py_ssize_t pos = 0; |
| 387 | + Py_ssize_t cnt = 0; |
| 388 | + asdl_expr_seq *seq = _Py_asdl_expr_seq_new(asdl_seq_LEN(elts) * 2 + 1, arena); |
| 389 | + if (!seq) { |
| 390 | + return 0; |
| 391 | + } |
| 392 | + seq->size = 0; |
| 393 | + |
| 394 | + while (1) { |
| 395 | + expr_ty lit = parse_literal(fmt, &pos, arena); |
| 396 | + if (lit) { |
| 397 | + asdl_seq_SET(seq, seq->size++, lit); |
| 398 | + } |
| 399 | + else if (PyErr_Occurred()) { |
| 400 | + return 0; |
| 401 | + } |
| 402 | + |
| 403 | + if (pos >= PyUnicode_GET_LENGTH(fmt)) { |
| 404 | + break; |
| 405 | + } |
| 406 | + if (cnt >= asdl_seq_LEN(elts)) { |
| 407 | + // More format units than items. |
| 408 | + return 1; |
| 409 | + } |
| 410 | + assert(PyUnicode_READ_CHAR(fmt, pos) == '%'); |
| 411 | + pos++; |
| 412 | + expr_ty expr = parse_format(fmt, &pos, asdl_seq_GET(elts, cnt), arena); |
| 413 | + cnt++; |
| 414 | + if (!expr) { |
| 415 | + return !PyErr_Occurred(); |
| 416 | + } |
| 417 | + asdl_seq_SET(seq, seq->size++, expr); |
| 418 | + } |
| 419 | + if (cnt < asdl_seq_LEN(elts)) { |
| 420 | + // More items than format units. |
| 421 | + return 1; |
| 422 | + } |
| 423 | + expr_ty res = _PyAST_JoinedStr(seq, |
| 424 | + node->lineno, node->col_offset, |
| 425 | + node->end_lineno, node->end_col_offset, |
| 426 | + arena); |
| 427 | + if (!res) { |
| 428 | + return 0; |
| 429 | + } |
| 430 | + COPY_NODE(node, res); |
| 431 | +// PySys_FormatStderr("format = %R\n", fmt); |
| 432 | + return 1; |
| 433 | +} |
| 434 | + |
227 | 435 | static int
|
228 | 436 | fold_binop(expr_ty node, PyArena *arena, _PyASTOptimizeState *state)
|
229 | 437 | {
|
230 | 438 | expr_ty lhs, rhs;
|
231 | 439 | lhs = node->v.BinOp.left;
|
232 | 440 | rhs = node->v.BinOp.right;
|
233 |
| - if (lhs->kind != Constant_kind || rhs->kind != Constant_kind) { |
| 441 | + if (lhs->kind != Constant_kind) { |
234 | 442 | return 1;
|
235 | 443 | }
|
236 |
| - |
237 | 444 | PyObject *lv = lhs->v.Constant.value;
|
| 445 | + |
| 446 | + if (node->v.BinOp.op == Mod && |
| 447 | + rhs->kind == Tuple_kind && |
| 448 | + PyUnicode_Check(lv)) |
| 449 | + { |
| 450 | + return optimize_format(node, lv, rhs->v.Tuple.elts, arena); |
| 451 | + } |
| 452 | + |
| 453 | + if (rhs->kind != Constant_kind) { |
| 454 | + return 1; |
| 455 | + } |
| 456 | + |
238 | 457 | PyObject *rv = rhs->v.Constant.value;
|
239 | 458 | PyObject *newval = NULL;
|
240 | 459 |
|
|
0 commit comments