python · serhiy-storchaka · May 16, 2021 · May 16, 2021 · May 18, 2021 · May 23, 2021
diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst
@@ -173,15 +173,22 @@ Literals
      function call).
    * ``conversion`` is an integer:
 
-     * -1: no formatting
-     * 115: ``!s`` string formatting
-     * 114: ``!r`` repr formatting
-     * 97: ``!a`` ascii formatting
+     * ``-1``: no formatting
+     * ``ord('s')``: convert to :class:`str` before formatting (``!s``)
+     * ``ord('r')``: call :func:`repr` before formatting (``!r``)
+     * ``ord('a')``: call :func:`ascii` before formatting (``!a``)
+     * ``ord('d')``: convert to :class:`int` with truncating before formatting
+     * ``ord('i')``: call :func:`operator.index` before formatting
+     * ``ord('f')``: convert to :class:`float` before formatting
 
    * ``format_spec`` is a :class:`JoinedStr` node representing the formatting
      of the value, or ``None`` if no format was specified. Both
      ``conversion`` and ``format_spec`` can be set at the same time.
 
+   .. versionchanged:: 3.11
+      Added support for lossy and lossless convertions to :class:`int`
+      and :class:`float`.
+
 
 .. class:: JoinedStr(values)
 

diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst
@@ -1227,21 +1227,31 @@ All of the following opcodes use their arguments.
    an optional *fmt_spec* from the stack, then a required *value*.
    *flags* is interpreted as follows:
 
-   * ``(flags & 0x03) == 0x00``: *value* is formatted as-is.
-   * ``(flags & 0x03) == 0x01``: call :func:`str` on *value* before
+   * ``(flags & 0x07) == 0x00``: *value* is formatted as-is.
+   * ``(flags & 0x07) == 0x01``: call :func:`str` on *value* before
      formatting it.
-   * ``(flags & 0x03) == 0x02``: call :func:`repr` on *value* before
+   * ``(flags & 0x07) == 0x02``: call :func:`repr` on *value* before
      formatting it.
-   * ``(flags & 0x03) == 0x03``: call :func:`ascii` on *value* before
+   * ``(flags & 0x07) == 0x03``: call :func:`ascii` on *value* before
      formatting it.
-   * ``(flags & 0x04) == 0x04``: pop *fmt_spec* from the stack and use
+   * ``(flags & 0x07) == 0x04``: convert *value* to :class:`int` with
+     truncating before formatting it.
+   * ``(flags & 0x07) == 0x05``: call :func:`operator.index` on *value* before
+     formatting it.
+   * ``(flags & 0x07) == 0x06``: convert *value* to :class:`float` before
+     formatting it.
+   * ``(flags & 0x08) == 0x08``: pop *fmt_spec* from the stack and use
      it, else use an empty *fmt_spec*.
 
    Formatting is performed using :c:func:`PyObject_Format`.  The
    result is pushed on the stack.
 
    .. versionadded:: 3.6
 
+   .. versionchanged:: 3.11
+      Added support of lossy and lossless convertions to :class:`int`
+      and :class:`float`.
+
 
 .. opcode:: MATCH_CLASS (count)
 

@@ -91,8 +91,7 @@ Optimizations
 =============
 
 * Compiler now optimizes simple C-style formatting with literal format
-  containing only format codes ``%s``, ``%r`` and ``%a`` and makes it as
-  fast as corresponding f-string expression.
+  string and makes it as fast as corresponding f-string expression.
   (Contributed by Serhiy Storchaka in :issue:`28307`.)
 
 * "Zero-cost" exceptions are implemented. The cost of ``try`` statements is

diff --git a/Include/ceval.h b/Include/ceval.h
@@ -138,13 +138,16 @@ PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate);
                  }
 
 /* Masks and values used by FORMAT_VALUE opcode. */
-#define FVC_MASK      0x3
+#define FVC_MASK      0x7
 #define FVC_NONE      0x0
 #define FVC_STR       0x1
 #define FVC_REPR      0x2
 #define FVC_ASCII     0x3
-#define FVS_MASK      0x4
-#define FVS_HAVE_SPEC 0x4
+#define FVC_INT       0x4
+#define FVC_INDEX     0x5
+#define FVC_FLOAT     0x6
+#define FVS_MASK      0x8
+#define FVS_HAVE_SPEC 0x8
 
 #ifndef Py_LIMITED_API
 #  define Py_CPYTHON_CEVAL_H

diff --git a/Lib/dis.py b/Lib/dis.py
@@ -4,6 +4,7 @@
 import types
 import collections
 import io
+import operator
 
 from opcode import *
 from opcode import __all__ as _opcodes_all
@@ -22,6 +23,9 @@
     (str, 'str'),
     (repr, 'repr'),
     (ascii, 'ascii'),
+    (int, 'int'),
+    (operator.index, 'index'),
+    (float, 'float'),
 )
 MAKE_FUNCTION = opmap['MAKE_FUNCTION']
 MAKE_FUNCTION_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure')
@@ -383,8 +387,8 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
             elif op in hasfree:
                 argval, argrepr = _get_name_info(arg, cells)
             elif op == FORMAT_VALUE:
-                argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x3]
-                argval = (argval, bool(arg & 0x4))
+                argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x7]
+                argval = (argval, bool(arg & 0x8))
                 if argval[1]:
                     if argrepr:
                         argrepr += ', '

@@ -354,6 +354,8 @@ def _write_atomic(path, data, mode=0o666):
 #     Python 3.10b1 3439 (Add ROT_N)
 #     Python 3.11a1 3450 Use exception table for unwinding ("zero cost" exception handling)
 #     Python 3.11a1 3451 (Add CALL_METHOD_KW)
+#     Python 3.11a1 3452 (Support int, index and float converter in
+#                         FORMAT_VALUE #28307)
 
 #
 # MAGIC must change whenever the bytecode emitted by the compiler may no
@@ -363,7 +365,7 @@ def _write_atomic(path, data, mode=0o666):
 # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
 # in PC/launcher.c must also be updated.
 
-MAGIC_NUMBER = (3451).to_bytes(2, 'little') + b'\r\n'
+MAGIC_NUMBER = (3452).to_bytes(2, 'little') + b'\r\n'
 _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little')  # For import.c
 
 _PYCACHE = '__pycache__'

diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py
@@ -340,14 +340,14 @@ def _fstring(a, b, c, d):
               4 LOAD_CONST               1 (' ')
               6 LOAD_FAST                1 (b)
               8 LOAD_CONST               2 ('4')
-             10 FORMAT_VALUE             4 (with format)
+             10 FORMAT_VALUE             8 (with format)
              12 LOAD_CONST               1 (' ')
              14 LOAD_FAST                2 (c)
              16 FORMAT_VALUE             2 (repr)
              18 LOAD_CONST               1 (' ')
              20 LOAD_FAST                3 (d)
              22 LOAD_CONST               2 ('4')
-             24 FORMAT_VALUE             6 (repr, with format)
+             24 FORMAT_VALUE            10 (repr, with format)
              26 BUILD_STRING             7
              28 RETURN_VALUE
 """ % (_fstring.__code__.co_firstlineno + 1,)

diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py
@@ -555,13 +555,15 @@ def test_format_errors(self):
             eval("'%s%z' % (x,)", {'x': 1234})
         with self.assertRaisesRegex(ValueError, 'unsupported format character'):
             eval("'%s%z' % (x, 5)", {'x': 1234})
-        with self.assertRaisesRegex(TypeError, 'a real number is required, not str'):
+        with self.assertRaisesRegex(TypeError, r'\breal number\b.*, not str'):
             eval("'%d' % (x,)", {'x': '1234'})
-        with self.assertRaisesRegex(TypeError, 'an integer is required, not float'):
+        with self.assertRaisesRegex(TypeError,
+                    r'\binteger\b.*, not float|\bfloat.* cannot .*\binteger\b'):
             eval("'%x' % (x,)", {'x': 1234.56})
-        with self.assertRaisesRegex(TypeError, 'an integer is required, not str'):
+        with self.assertRaisesRegex(TypeError,
+                    r'\binteger\b.*, not str|\bstr.* cannot .*\binteger\b'):
             eval("'%x' % (x,)", {'x': '1234'})
-        with self.assertRaisesRegex(TypeError, 'must be real number, not str'):
+        with self.assertRaisesRegex(TypeError, r'\breal number\b.*, not str'):
             eval("'%f' % (x,)", {'x': '1234'})
         with self.assertRaisesRegex(TypeError,
                     'not enough arguments for format string'):

diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-05-08-19-54-57.bpo-28307.7ysaVW.rst b/Misc/NEWS.d/next/Core and Builtins/2021-05-08-19-54-57.bpo-28307.7ysaVW.rst
@@ -1,3 +1,2 @@
 Compiler now optimizes simple C-style formatting with literal format
-containing only format codes %s, %r and %a by converting them to f-string
-expressions.
+string by converting them to f-string expressions.
diff --git a/Python/ast_opt.c b/Python/ast_opt.c
@@ -354,44 +354,95 @@ simple_format_arg_parse(PyObject *fmt, Py_ssize_t *ppos,
 static expr_ty
 parse_format(PyObject *fmt, Py_ssize_t *ppos, expr_ty arg, PyArena *arena)
 {
-    int spec, flags, width = -1, prec = -1;
+    int spec, flags, width = -1, prec = -1, conv, type, align = 0;
     if (!simple_format_arg_parse(fmt, ppos, &spec, &flags, &width, &prec)) {
         // Unsupported format.
         return NULL;
     }
-    if (spec == 's' || spec == 'r' || spec == 'a') {
-        char buf[1 + MAXDIGITS + 1 + MAXDIGITS + 1], *p = buf;
-        if (!(flags & F_LJUST) && width > 0) {
-            *p++ = '>';
-        }
-        if (width >= 0) {
-            p += snprintf(p, MAXDIGITS + 1, "%d", width);
-        }
-        if (prec >= 0) {
-            p += snprintf(p, MAXDIGITS + 2, ".%d", prec);
+    char buf[4 + MAXDIGITS + 1 + MAXDIGITS + 2], *p = buf;
+    align = (flags & F_LJUST) ? '<' : 0;
+    switch (spec) {
+        case 's': case 'r': case 'a': {
+            align = (flags & F_LJUST) ? 0 : '>';
+            flags &= F_LJUST;
+            conv = spec;
+            type = 0;
+            break;
         }
-        expr_ty format_spec = NULL;
-        if (p != buf) {
-            PyObject *str = PyUnicode_FromString(buf);
-            if (str == NULL) {
-                return NULL;
-            }
-            if (_PyArena_AddPyObject(arena, str) < 0) {
-                Py_DECREF(str);
+        case 'd': case 'u': case 'i': {
+            if (prec >= 0) {
+                // Unsupported format.
                 return NULL;
             }
-            format_spec = _PyAST_Constant(str, NULL, -1, -1, -1, -1, arena);
-            if (format_spec == NULL) {
+            conv = 'd';
+            type = 0;
+            break;
+        }
+        case 'o': case 'x': case 'X': {
+            if (prec >= 0) {
+                // Unsupported format.
                 return NULL;
             }
+            conv = 'i';
+            type = spec;
+            break;
+        }
+        case 'f': case 'e': case 'g': case 'F': case 'E': case 'G': {
+            conv = 'f';
+            type = spec;
+            break;
+        }
+        default: {
+            // Unsupported format.
+            return NULL;
+        }
+    }
+
+    if (align && width > 0) {
+        *p++ = align;
+    }
+    if (flags & F_SIGN) {
+        *p++ = '+';
+    }
+    else if (flags & F_BLANK) {
+        *p++ = ' ';
+    }
+    if (flags & F_ALT) {
+        *p++ = '#';
+    }
+    if ((flags & F_ZERO) && !align && width > 0) {
+        *p++ = '0';
+    }
+
+    if (width >= 0) {
+        p += snprintf(p, MAXDIGITS + 1, "%d", width);
+    }
+    if (prec >= 0) {
+        p += snprintf(p, MAXDIGITS + 2, ".%d", prec);
+    }
+    if (type) {
+        *p++ = type;
+    }
+    *p = 0;
+    expr_ty format_spec = NULL;
+    if (p != buf) {
+        PyObject *str = PyUnicode_FromString(buf);
+        if (str == NULL) {
+            return NULL;
+        }
+        if (_PyArena_AddPyObject(arena, str) < 0) {
+            Py_DECREF(str);
+            return NULL;
+        }
+        format_spec = _PyAST_Constant(str, NULL, -1, -1, -1, -1, arena);
+        if (format_spec == NULL) {
+            return NULL;
         }
-        return _PyAST_FormattedValue(arg, spec, format_spec,
-                                     arg->lineno, arg->col_offset,
-                                     arg->end_lineno, arg->end_col_offset,
-                                     arena);
     }
-    // Unsupported format.
-    return NULL;
+    return _PyAST_FormattedValue(arg, conv, format_spec,
+                                 arg->lineno, arg->col_offset,
+                                 arg->end_lineno, arg->end_col_offset,
+                                 arena);
 }
 
 static int

@@ -4371,22 +4371,37 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
             PyObject *fmt_spec;
             PyObject *value;
             PyObject *(*conv_fn)(PyObject *);
-            int which_conversion = oparg & FVC_MASK;
+            int conv = oparg & FVC_MASK;
             int have_fmt_spec = (oparg & FVS_MASK) == FVS_HAVE_SPEC;
 
             fmt_spec = have_fmt_spec ? POP() : NULL;
             value = POP();
 
             /* See if any conversion is specified. */
-            switch (which_conversion) {
+            switch (conv) {
             case FVC_NONE:  conv_fn = NULL;           break;
             case FVC_STR:   conv_fn = PyObject_Str;   break;
             case FVC_REPR:  conv_fn = PyObject_Repr;  break;
             case FVC_ASCII: conv_fn = PyObject_ASCII; break;
+            case FVC_INDEX: conv_fn = PyNumber_Index; break;
+            case FVC_INT:
+            case FVC_FLOAT:
+                if (!PyNumber_Check(value)) {
+                    _PyErr_Format(tstate, PyExc_TypeError,
+                                  "a real number is required, not %.200s",
+                                  Py_TYPE(value)->tp_name);
+                    Py_DECREF(value);
+                    Py_XDECREF(fmt_spec);
+                    goto error;
+                }
+                conv_fn = (conv == FVC_INT) ? PyNumber_Long : PyNumber_Float;
+                break;
             default:
                 _PyErr_Format(tstate, PyExc_SystemError,
                               "unexpected conversion flag %d",
-                              which_conversion);
+                              conv);
+                Py_DECREF(value);
+                Py_XDECREF(fmt_spec);
                 goto error;
             }
 

@@ -4400,15 +4400,18 @@ compiler_formatted_value(struct compiler *c, expr_ty e)
     /* Our oparg encodes 2 pieces of information: the conversion
        character, and whether or not a format_spec was provided.
 
-       Convert the conversion char to 3 bits:
-           : 000  0x0  FVC_NONE   The default if nothing specified.
-       !s  : 001  0x1  FVC_STR
-       !r  : 010  0x2  FVC_REPR
-       !a  : 011  0x3  FVC_ASCII
+       Convert the conversion char to 4 bits:
+           : 0000  0x0  FVC_NONE   The default if nothing specified.
+       !s  : 0001  0x1  FVC_STR
+       !r  : 0010  0x2  FVC_REPR
+       !a  : 0011  0x3  FVC_ASCII
+       !d  : 0100  0x4  FVC_INT
+       !i  : 0101  0x5  FVC_INDEX
+       !f  : 0110  0x6  FVC_FLOAT
 
        next bit is whether or not we have a format spec:
-       yes : 100  0x4
-       no  : 000  0x0
+       yes : 1000  0x8
+       no  : 0000  0x0
     */
 
     int conversion = e->v.FormattedValue.conversion;
@@ -4421,6 +4424,9 @@ compiler_formatted_value(struct compiler *c, expr_ty e)
     case 's': oparg = FVC_STR;   break;
     case 'r': oparg = FVC_REPR;  break;
     case 'a': oparg = FVC_ASCII; break;
+    case 'd': oparg = FVC_INT; break;
+    case 'i': oparg = FVC_INDEX; break;
+    case 'f': oparg = FVC_FLOAT; break;
     case -1:  oparg = FVC_NONE;  break;
     default:
         PyErr_Format(PyExc_SystemError,