Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions Doc/library/ast.rst
Original file line number Diff line number Diff line change
Expand Up @@ -173,15 +173,22 @@ Literals
function call).
* ``conversion`` is an integer:

* -1: no formatting
* 115: ``!s`` string formatting
* 114: ``!r`` repr formatting
* 97: ``!a`` ascii formatting
* ``-1``: no formatting
* ``ord('s')``: convert to :class:`str` before formatting (``!s``)
* ``ord('r')``: call :func:`repr` before formatting (``!r``)
* ``ord('a')``: call :func:`ascii` before formatting (``!a``)
* ``ord('d')``: convert to :class:`int` with truncating before formatting
* ``ord('i')``: call :func:`operator.index` before formatting
* ``ord('f')``: convert to :class:`float` before formatting

* ``format_spec`` is a :class:`JoinedStr` node representing the formatting
of the value, or ``None`` if no format was specified. Both
``conversion`` and ``format_spec`` can be set at the same time.

.. versionchanged:: 3.11
Added support for lossy and lossless convertions to :class:`int`
and :class:`float`.


.. class:: JoinedStr(values)

Expand Down
20 changes: 15 additions & 5 deletions Doc/library/dis.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1227,21 +1227,31 @@ All of the following opcodes use their arguments.
an optional *fmt_spec* from the stack, then a required *value*.
*flags* is interpreted as follows:

* ``(flags & 0x03) == 0x00``: *value* is formatted as-is.
* ``(flags & 0x03) == 0x01``: call :func:`str` on *value* before
* ``(flags & 0x07) == 0x00``: *value* is formatted as-is.
* ``(flags & 0x07) == 0x01``: call :func:`str` on *value* before
formatting it.
* ``(flags & 0x03) == 0x02``: call :func:`repr` on *value* before
* ``(flags & 0x07) == 0x02``: call :func:`repr` on *value* before
formatting it.
* ``(flags & 0x03) == 0x03``: call :func:`ascii` on *value* before
* ``(flags & 0x07) == 0x03``: call :func:`ascii` on *value* before
formatting it.
* ``(flags & 0x04) == 0x04``: pop *fmt_spec* from the stack and use
* ``(flags & 0x07) == 0x04``: convert *value* to :class:`int` with
truncating before formatting it.
* ``(flags & 0x07) == 0x05``: call :func:`operator.index` on *value* before
formatting it.
* ``(flags & 0x07) == 0x06``: convert *value* to :class:`float` before
formatting it.
* ``(flags & 0x08) == 0x08``: pop *fmt_spec* from the stack and use
it, else use an empty *fmt_spec*.

Formatting is performed using :c:func:`PyObject_Format`. The
result is pushed on the stack.

.. versionadded:: 3.6

.. versionchanged:: 3.11
Added support of lossy and lossless convertions to :class:`int`
and :class:`float`.


.. opcode:: MATCH_CLASS (count)

Expand Down
3 changes: 1 addition & 2 deletions Doc/whatsnew/3.11.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,7 @@ Optimizations
=============

* Compiler now optimizes simple C-style formatting with literal format
containing only format codes ``%s``, ``%r`` and ``%a`` and makes it as
fast as corresponding f-string expression.
string and makes it as fast as corresponding f-string expression.
(Contributed by Serhiy Storchaka in :issue:`28307`.)

* "Zero-cost" exceptions are implemented. The cost of ``try`` statements is
Expand Down
9 changes: 6 additions & 3 deletions Include/ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,16 @@ PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate);
}

/* Masks and values used by FORMAT_VALUE opcode. */
#define FVC_MASK 0x3
#define FVC_MASK 0x7
#define FVC_NONE 0x0
#define FVC_STR 0x1
#define FVC_REPR 0x2
#define FVC_ASCII 0x3
#define FVS_MASK 0x4
#define FVS_HAVE_SPEC 0x4
#define FVC_INT 0x4
#define FVC_INDEX 0x5
#define FVC_FLOAT 0x6
#define FVS_MASK 0x8
#define FVS_HAVE_SPEC 0x8

#ifndef Py_LIMITED_API
# define Py_CPYTHON_CEVAL_H
Expand Down
8 changes: 6 additions & 2 deletions Lib/dis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import types
import collections
import io
import operator

from opcode import *
from opcode import __all__ as _opcodes_all
Expand All @@ -22,6 +23,9 @@
(str, 'str'),
(repr, 'repr'),
(ascii, 'ascii'),
(int, 'int'),
(operator.index, 'index'),
(float, 'float'),
)
MAKE_FUNCTION = opmap['MAKE_FUNCTION']
MAKE_FUNCTION_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure')
Expand Down Expand Up @@ -383,8 +387,8 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
elif op in hasfree:
argval, argrepr = _get_name_info(arg, cells)
elif op == FORMAT_VALUE:
argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x3]
argval = (argval, bool(arg & 0x4))
argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x7]
argval = (argval, bool(arg & 0x8))
if argval[1]:
if argrepr:
argrepr += ', '
Expand Down
4 changes: 3 additions & 1 deletion Lib/importlib/_bootstrap_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,8 @@ def _write_atomic(path, data, mode=0o666):
# Python 3.10b1 3439 (Add ROT_N)
# Python 3.11a1 3450 Use exception table for unwinding ("zero cost" exception handling)
# Python 3.11a1 3451 (Add CALL_METHOD_KW)
# Python 3.11a1 3452 (Support int, index and float converter in
# FORMAT_VALUE #28307)

#
# MAGIC must change whenever the bytecode emitted by the compiler may no
Expand All @@ -363,7 +365,7 @@ def _write_atomic(path, data, mode=0o666):
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated.

MAGIC_NUMBER = (3451).to_bytes(2, 'little') + b'\r\n'
MAGIC_NUMBER = (3452).to_bytes(2, 'little') + b'\r\n'
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c

_PYCACHE = '__pycache__'
Expand Down
4 changes: 2 additions & 2 deletions Lib/test/test_dis.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,14 +340,14 @@ def _fstring(a, b, c, d):
4 LOAD_CONST 1 (' ')
6 LOAD_FAST 1 (b)
8 LOAD_CONST 2 ('4')
10 FORMAT_VALUE 4 (with format)
10 FORMAT_VALUE 8 (with format)
12 LOAD_CONST 1 (' ')
14 LOAD_FAST 2 (c)
16 FORMAT_VALUE 2 (repr)
18 LOAD_CONST 1 (' ')
20 LOAD_FAST 3 (d)
22 LOAD_CONST 2 ('4')
24 FORMAT_VALUE 6 (repr, with format)
24 FORMAT_VALUE 10 (repr, with format)
26 BUILD_STRING 7
28 RETURN_VALUE
""" % (_fstring.__code__.co_firstlineno + 1,)
Expand Down
10 changes: 6 additions & 4 deletions Lib/test/test_peepholer.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,13 +555,15 @@ def test_format_errors(self):
eval("'%s%z' % (x,)", {'x': 1234})
with self.assertRaisesRegex(ValueError, 'unsupported format character'):
eval("'%s%z' % (x, 5)", {'x': 1234})
with self.assertRaisesRegex(TypeError, 'a real number is required, not str'):
with self.assertRaisesRegex(TypeError, r'\breal number\b.*, not str'):
eval("'%d' % (x,)", {'x': '1234'})
with self.assertRaisesRegex(TypeError, 'an integer is required, not float'):
with self.assertRaisesRegex(TypeError,
r'\binteger\b.*, not float|\bfloat.* cannot .*\binteger\b'):
eval("'%x' % (x,)", {'x': 1234.56})
with self.assertRaisesRegex(TypeError, 'an integer is required, not str'):
with self.assertRaisesRegex(TypeError,
r'\binteger\b.*, not str|\bstr.* cannot .*\binteger\b'):
eval("'%x' % (x,)", {'x': '1234'})
with self.assertRaisesRegex(TypeError, 'must be real number, not str'):
with self.assertRaisesRegex(TypeError, r'\breal number\b.*, not str'):
eval("'%f' % (x,)", {'x': '1234'})
with self.assertRaisesRegex(TypeError,
'not enough arguments for format string'):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
Compiler now optimizes simple C-style formatting with literal format
containing only format codes %s, %r and %a by converting them to f-string
expressions.
string by converting them to f-string expressions.
105 changes: 78 additions & 27 deletions Python/ast_opt.c
Original file line number Diff line number Diff line change
Expand Up @@ -354,44 +354,95 @@ simple_format_arg_parse(PyObject *fmt, Py_ssize_t *ppos,
static expr_ty
parse_format(PyObject *fmt, Py_ssize_t *ppos, expr_ty arg, PyArena *arena)
{
int spec, flags, width = -1, prec = -1;
int spec, flags, width = -1, prec = -1, conv, type, align = 0;
if (!simple_format_arg_parse(fmt, ppos, &spec, &flags, &width, &prec)) {
// Unsupported format.
return NULL;
}
if (spec == 's' || spec == 'r' || spec == 'a') {
char buf[1 + MAXDIGITS + 1 + MAXDIGITS + 1], *p = buf;
if (!(flags & F_LJUST) && width > 0) {
*p++ = '>';
}
if (width >= 0) {
p += snprintf(p, MAXDIGITS + 1, "%d", width);
}
if (prec >= 0) {
p += snprintf(p, MAXDIGITS + 2, ".%d", prec);
char buf[4 + MAXDIGITS + 1 + MAXDIGITS + 2], *p = buf;
align = (flags & F_LJUST) ? '<' : 0;
switch (spec) {
case 's': case 'r': case 'a': {
align = (flags & F_LJUST) ? 0 : '>';
flags &= F_LJUST;
conv = spec;
type = 0;
break;
}
expr_ty format_spec = NULL;
if (p != buf) {
PyObject *str = PyUnicode_FromString(buf);
if (str == NULL) {
return NULL;
}
if (_PyArena_AddPyObject(arena, str) < 0) {
Py_DECREF(str);
case 'd': case 'u': case 'i': {
if (prec >= 0) {
// Unsupported format.
return NULL;
}
format_spec = _PyAST_Constant(str, NULL, -1, -1, -1, -1, arena);
if (format_spec == NULL) {
conv = 'd';
type = 0;
break;
}
case 'o': case 'x': case 'X': {
if (prec >= 0) {
// Unsupported format.
return NULL;
}
conv = 'i';
type = spec;
break;
}
case 'f': case 'e': case 'g': case 'F': case 'E': case 'G': {
conv = 'f';
type = spec;
break;
}
default: {
// Unsupported format.
return NULL;
}
}

if (align && width > 0) {
*p++ = align;
}
if (flags & F_SIGN) {
*p++ = '+';
}
else if (flags & F_BLANK) {
*p++ = ' ';
}
if (flags & F_ALT) {
*p++ = '#';
}
if ((flags & F_ZERO) && !align && width > 0) {
*p++ = '0';
}

if (width >= 0) {
p += snprintf(p, MAXDIGITS + 1, "%d", width);
}
if (prec >= 0) {
p += snprintf(p, MAXDIGITS + 2, ".%d", prec);
}
if (type) {
*p++ = type;
}
*p = 0;
expr_ty format_spec = NULL;
if (p != buf) {
PyObject *str = PyUnicode_FromString(buf);
if (str == NULL) {
return NULL;
}
if (_PyArena_AddPyObject(arena, str) < 0) {
Py_DECREF(str);
return NULL;
}
format_spec = _PyAST_Constant(str, NULL, -1, -1, -1, -1, arena);
if (format_spec == NULL) {
return NULL;
}
return _PyAST_FormattedValue(arg, spec, format_spec,
arg->lineno, arg->col_offset,
arg->end_lineno, arg->end_col_offset,
arena);
}
// Unsupported format.
return NULL;
return _PyAST_FormattedValue(arg, conv, format_spec,
arg->lineno, arg->col_offset,
arg->end_lineno, arg->end_col_offset,
arena);
}

static int
Expand Down
21 changes: 18 additions & 3 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -4371,22 +4371,37 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
PyObject *fmt_spec;
PyObject *value;
PyObject *(*conv_fn)(PyObject *);
int which_conversion = oparg & FVC_MASK;
int conv = oparg & FVC_MASK;
int have_fmt_spec = (oparg & FVS_MASK) == FVS_HAVE_SPEC;

fmt_spec = have_fmt_spec ? POP() : NULL;
value = POP();

/* See if any conversion is specified. */
switch (which_conversion) {
switch (conv) {
case FVC_NONE: conv_fn = NULL; break;
case FVC_STR: conv_fn = PyObject_Str; break;
case FVC_REPR: conv_fn = PyObject_Repr; break;
case FVC_ASCII: conv_fn = PyObject_ASCII; break;
case FVC_INDEX: conv_fn = PyNumber_Index; break;
case FVC_INT:
case FVC_FLOAT:
if (!PyNumber_Check(value)) {
_PyErr_Format(tstate, PyExc_TypeError,
"a real number is required, not %.200s",
Py_TYPE(value)->tp_name);
Py_DECREF(value);
Py_XDECREF(fmt_spec);
goto error;
}
conv_fn = (conv == FVC_INT) ? PyNumber_Long : PyNumber_Float;
break;
default:
_PyErr_Format(tstate, PyExc_SystemError,
"unexpected conversion flag %d",
which_conversion);
conv);
Py_DECREF(value);
Py_XDECREF(fmt_spec);
goto error;
}

Expand Down
20 changes: 13 additions & 7 deletions Python/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -4400,15 +4400,18 @@ compiler_formatted_value(struct compiler *c, expr_ty e)
/* Our oparg encodes 2 pieces of information: the conversion
character, and whether or not a format_spec was provided.

Convert the conversion char to 3 bits:
: 000 0x0 FVC_NONE The default if nothing specified.
!s : 001 0x1 FVC_STR
!r : 010 0x2 FVC_REPR
!a : 011 0x3 FVC_ASCII
Convert the conversion char to 4 bits:
: 0000 0x0 FVC_NONE The default if nothing specified.
!s : 0001 0x1 FVC_STR
!r : 0010 0x2 FVC_REPR
!a : 0011 0x3 FVC_ASCII
!d : 0100 0x4 FVC_INT
!i : 0101 0x5 FVC_INDEX
!f : 0110 0x6 FVC_FLOAT

next bit is whether or not we have a format spec:
yes : 100 0x4
no : 000 0x0
yes : 1000 0x8
no : 0000 0x0
*/

int conversion = e->v.FormattedValue.conversion;
Expand All @@ -4421,6 +4424,9 @@ compiler_formatted_value(struct compiler *c, expr_ty e)
case 's': oparg = FVC_STR; break;
case 'r': oparg = FVC_REPR; break;
case 'a': oparg = FVC_ASCII; break;
case 'd': oparg = FVC_INT; break;
case 'i': oparg = FVC_INDEX; break;
case 'f': oparg = FVC_FLOAT; break;
case -1: oparg = FVC_NONE; break;
default:
PyErr_Format(PyExc_SystemError,
Expand Down
Loading