diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst
index 23f34a3de0f05cd..4dcb13ddfd6e69f 100644
--- a/Doc/library/functions.rst
+++ b/Doc/library/functions.rst
@@ -788,6 +788,14 @@ section.
The long type is described in :ref:`typesnumeric`.
+ .. versionchanged:: 2.7.18.6
+ :class:`long` string inputs and string representations can be limited to
+ help avoid denial of service attacks. A :exc:`ValueError` is raised when
+ the limit is exceeded while converting a string *x* to an :class:`long` or
+ when converting an :class:`long` into a string would exceed the limit.
+ See the :ref:`integer string conversion length limitation
+ ` documentation.
+
.. function:: map(function, iterable, ...)
diff --git a/Doc/library/json.rst b/Doc/library/json.rst
index 87dc0541dc7f1c8..29867ff33201b72 100644
--- a/Doc/library/json.rst
+++ b/Doc/library/json.rst
@@ -14,6 +14,11 @@ is a lightweight data interchange format inspired by
`JavaScript `_ object literal syntax
(although it is not a strict subset of JavaScript [#rfc-errata]_ ).
+.. warning::
+ Be cautious when parsing JSON data from untrusted sources. A malicious
+ JSON string may cause the decoder to consume considerable CPU and memory
+ resources. Limiting the size of data to be parsed is recommended.
+
:mod:`json` exposes an API familiar to users of the standard library
:mod:`marshal` and :mod:`pickle` modules.
@@ -249,6 +254,12 @@ Basic Usage
be used to use another datatype or parser for JSON integers
(e.g. :class:`float`).
+ .. versionchanged:: 2.7.18.6
+ The default *parse_int* of :func:`int` now limits the maximum length of
+ the integer string via the interpreter's :ref:`integer string
+ conversion length limitation ` to help avoid denial
+ of service attacks.
+
*parse_constant*, if specified, will be called with one of the following
strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.
This can be used to raise an exception if invalid JSON numbers
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
index b4fe19a5f0818a7..91b560693f9e42e 100644
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -521,6 +521,13 @@ class`. float also has the following additional methods.
.. versionadded:: 2.6
+ .. note::
+
+ The values returned by ``as_integer_ratio()`` can be huge. Attempts
+ to render such integers into decimal strings may bump into the
+ :ref:`integer string conversion length limitation
+ `.
+
.. method:: float.is_integer()
Return ``True`` if the float instance is finite with integral
@@ -3190,6 +3197,167 @@ The following attributes are only supported by :term:`new-style class`\ es.
[]
+.. _int_max_str_digits:
+
+Integer string conversion length limitation
+===========================================
+
+CPython has a global limit for converting between :class:`long` and :class:`str`
+or :class:`unicode` to mitigate denial of service attacks. This limit *only* applies
+to decimal or other non-power-of-two number bases. Hexadecimal, octal, and binary
+conversions are unlimited. The limit can be configured.
+
+The :class:`long` type in CPython is an arbitrary length number stored in binary
+form (commonly known as a "bignum"). There exists no algorithm that can convert
+a string to a binary integer or a binary integer to a string in linear time,
+*unless* the base is a power of 2. Even the best known algorithms for base 10
+have sub-quadratic complexity. Converting a large value such as ``long('1' *
+500_000)`` can take over a second on a fast CPU.
+
+Limiting conversion size offers a practical way to avoid `CVE-2020-10735
+`_.
+
+The limit is applied to the number of digit characters in the input or output
+string when a non-linear conversion algorithm would be involved. Trailing *L*s
+and the sign are not counted towards the limit.
+
+When an operation would exceed the limit, a :exc:`ValueError` is raised:
+
+.. doctest::
+
+ >>> import sys
+ >>> sys.set_int_max_str_digits(4300) # Illustrative, this is the default.
+ >>> _ = long('2' * 5432)
+ Traceback (most recent call last):
+ ...
+ ValueError: Exceeds the limit (4300) for integer string conversion: value has 5432 digits.
+ >>> i = long('2' * 4300)
+ >>> len(str(i))
+ 4300
+ >>> i_squared = i*i
+ >>> len(str(i_squared))
+ Traceback (most recent call last):
+ ...
+ ValueError: Exceeds the limit (4300) for integer string conversion: value has 8599 digits.
+ >>> len(hex(i_squared))
+ 7144
+ >>> assert long(hex(i_squared), base=16) == i*i # Hexadecimal is unlimited.
+
+The default limit is 4300 digits as provided in
+:data:`sys.long_info.default_max_str_digits `.
+The lowest limit that can be configured is 640 digits as provided in
+:data:`sys.long_info.str_digits_check_threshold `.
+
+Verification:
+
+.. doctest::
+
+ >>> import sys
+ >>> assert sys.int_info.default_max_str_digits == 4300, sys.int_info
+ >>> assert sys.int_info.str_digits_check_threshold == 640, sys.int_info
+ >>> msg = long('578966293710682886880994035146873798396722250538762761564'
+ ... '9252925514383915483333812743580549779436104706260696366600'
+ ... '571186405732').to_bytes(53, 'big')
+ ...
+
+.. versionadded:: 2.7.18.6
+
+
+Affected APIs
+-------------
+
+Because int automatically converts to long if the value is larger than
+:data:`sys.maxint` this limitation applies to potentially slow conversions
+between any of :class:`int` or :class:`long` and :class:`str` or :class:`unicode`:
+
+* ``int(string)`` with default base 10.
+* ``int(string, base)`` for all bases that are not a power of 2.
+* ``long(string)`` with default base 10.
+* ``long(string, base)`` for all bases that are not a power of 2.
+* ``int(unicode)`` with default base 10.
+* ``int(unicode, base)`` for all bases that are not a power of 2.
+* ``long(unicode)`` with default base 10.
+* ``long(unicode, base)`` for all bases that are not a power of 2.
+* ``str(long)``.
+* ``repr(long)``.
+* ``unicode(long)``.
+* any other string conversion to base 10, for example ``"{}".format(long)``.
+
+The limitations do not apply to functions with a linear algorithm:
+
+* ``long(string, base)`` with base 2, 4, 8, 16, or 32.
+* :func:`hex`, :func:`oct`, :func:`bin`.
+* :ref:`formatspec` for hex, octal, and binary numbers.
+* :class:`str` to :class:`float`.
+* :class:`str` to :class:`decimal.Decimal`.
+
+Configuring the limit
+---------------------
+
+Before Python starts up you can use an environment variable to configure the limit:
+
+* :envvar:`PYTHONINTMAXSTRDIGITS`, e.g.
+ ``PYTHONINTMAXSTRDIGITS=640 python`` to set the limit to 640 or
+ ``PYTHONINTMAXSTRDIGITS=0 python`` to disable the limitation.
+* :data:`sys.flags.long_max_str_digits` contains the value of
+ :envvar:`PYTHONINTMAXSTRDIGITS`. A value of *-1* indicates that none was set,
+ thus a value of :data:`sys.int_info.default_max_str_digits` was used during
+ initialization.
+
+From code, you can inspect the current limit and set a new one using these
+:mod:`sys` APIs:
+
+* :func:`sys.get_int_max_str_digits` and :func:`sys.set_int_max_str_digits` are
+ a getter and setter for the interpreter-wide limit.
+
+Information about the default and minimum can be found in :attr:`sys.long_info`:
+
+* :data:`sys.long_info.default_max_str_digits ` is the compiled-in
+ default limit.
+* :data:`sys.long_info.str_digits_check_threshold ` is the lowest
+ accepted value for the limit (other than 0 which disables it).
+
+.. versionadded:: 2.7.18.6
+
+.. caution::
+
+ Setting a low limit *can* lead to problems. While rare, code exists that
+ contains integer constants in decimal in their source that exceed the
+ minimum threshold. A consequence of setting the limit is that Python source
+ code containing decimal integer literals longer than the limit will
+ encounter an error during parsing, usually at startup time or import time or
+ even at installation time - anytime an up to date ``.pyc`` does not already
+ exist for the code. A workaround for source that contains such large
+ constants is to convert them to ``0x`` hexadecimal form as it has no limit.
+
+ Test your application thoroughly if you use a low limit. Ensure your tests
+ run with the limit set early via the environment so that it applies during
+ startup and even during any installation step that may invoke Python to
+ precompile ``.py`` sources to ``.pyc`` files.
+
+Recommended configuration
+-------------------------
+
+The default :data:`sys.long_info.default_max_str_digits` is expected to be
+reasonable for most applications. If your application requires a different
+limit, set it from your main entry point using Python version agnostic code as
+these APIs were ported from the original fix in version 3.12.
+
+Example::
+
+ >>> import sys
+ >>> if hasattr(sys, "set_int_max_str_digits"):
+ ... upper_bound = 68000
+ ... lower_bound = 4004
+ ... current_limit = sys.get_int_max_str_digits()
+ ... if current_limit == 0 or current_limit > upper_bound:
+ ... sys.set_int_max_str_digits(upper_bound)
+ ... elif current_limit < lower_bound:
+ ... sys.set_int_max_str_digits(lower_bound)
+
+If you need to disable it entirely, set it to ``0``.
+
+
.. rubric:: Footnotes
.. [1] Additional information on these special methods may be found in the Python
diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst
index 339625ad38021ff..3f3b2d393d9c781 100644
--- a/Doc/library/sys.rst
+++ b/Doc/library/sys.rst
@@ -431,6 +431,14 @@ always available.
an argument to :func:`getrefcount`.
+.. function:: get_int_max_str_digits()
+
+ Returns the current value for the :ref:`integer string conversion length
+ limitation `. See also :func:`set_int_max_str_digits`.
+
+ .. versionadded:: 2.7.18.6
+
+
.. function:: getrecursionlimit()
Return the current value of the recursion limit, the maximum depth of the Python
@@ -603,19 +611,30 @@ always available.
.. tabularcolumns:: |l|L|
- +-------------------------+----------------------------------------------+
- | Attribute | Explanation |
- +=========================+==============================================+
- | :const:`bits_per_digit` | number of bits held in each digit. Python |
- | | integers are stored internally in base |
- | | ``2**long_info.bits_per_digit`` |
- +-------------------------+----------------------------------------------+
- | :const:`sizeof_digit` | size in bytes of the C type used to |
- | | represent a digit |
- +-------------------------+----------------------------------------------+
+ +----------------------------------------+-----------------------------------------------+
+ | Attribute | Explanation |
+ +========================================+===============================================+
+ | :const:`bits_per_digit` | number of bits held in each digit. Python |
+ | | integers are stored internally in base |
+ | | ``2**int_info.bits_per_digit`` |
+ +----------------------------------------+-----------------------------------------------+
+ | :const:`sizeof_digit` | size in bytes of the C type used to |
+ | | represent a digit |
+ +----------------------------------------+-----------------------------------------------+
+ | :const:`default_max_str_digits` | default value for |
+ | | :func:`sys.get_int_max_str_digits` when it |
+ | | is not otherwise explicitly configured. |
+ +----------------------------------------+-----------------------------------------------+
+ | :const:`str_digits_check_threshold` | minimum non-zero value for |
+ | | :func:`sys.set_int_max_str_digits`, |
+ | | :envvar:`PYTHONINTMAXSTRDIGITS`. |
+ +----------------------------------------+-----------------------------------------------+
.. versionadded:: 2.7
+ .. versionchanged:: 2.7.18.6
+ Added ``default_max_str_digits`` and ``str_digits_check_threshold``.
+
.. data:: last_type
last_value
@@ -848,6 +867,15 @@ always available.
.. versionadded:: 2.2
+.. function:: set_int_max_str_digits(n)
+
+ Set the :ref:`integer string conversion length limitation
+ ` used by this interpreter. See also
+ :func:`get_int_max_str_digits`.
+
+ .. versionadded:: 2.7.18.6
+
+
.. function:: setprofile(profilefunc)
.. index::
diff --git a/Doc/library/test.rst b/Doc/library/test.rst
index 9d78c90f55d54d3..1b149673d4168c6 100644
--- a/Doc/library/test.rst
+++ b/Doc/library/test.rst
@@ -443,6 +443,16 @@ The :mod:`test.support` module defines the following functions:
.. versionadded:: 2.7
+.. function:: adjust_int_max_str_digits(max_digits)
+
+ This function returns a context manager that will change the global
+ :func:`sys.set_int_max_str_digits` setting for the duration of the
+ context to allow execution of test code that needs a different limit
+ on the number of digits when converting between an integer and string.
+
+ .. versionadded:: 2.7.18.6
+
+
The :mod:`test.support` module defines the following classes:
.. class:: TransientResource(exc[, **kwargs])
diff --git a/Doc/library/xmlrpclib.rst b/Doc/library/xmlrpclib.rst
index e818c3de6ab1a66..9f8db27c7724e93 100644
--- a/Doc/library/xmlrpclib.rst
+++ b/Doc/library/xmlrpclib.rst
@@ -557,6 +557,12 @@ Convenience Functions
.. versionchanged:: 2.5
The *use_datetime* flag was added.
+ .. versionchanged:: 2.7.18.6
+ The default *parse_int* of :func:`int` now limits the maximum length of
+ the integer string via the interpreter's :ref:`integer string
+ conversion length limitation ` to help avoid denial
+ of service attacks.
+
.. _xmlrpc-client-example:
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index c27ec4f3e634a5c..a55c64ee6071580 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -638,6 +638,15 @@ conflict.
.. versionadded:: 2.7.12
+
+.. envvar:: PYTHONINTMAXSTRDIGITS
+
+ If this variable is set to an integer, it is used to configure the
+ interpreter's global :ref:`integer string conversion length limitation
+ `.
+
+ .. versionadded:: 2.7.18.6
+
Debug-mode variables
~~~~~~~~~~~~~~~~~~~~
diff --git a/Doc/whatsnew/2.7.rst b/Doc/whatsnew/2.7.rst
index 992658e8bab83e3..b2c96361afde0ee 100644
--- a/Doc/whatsnew/2.7.rst
+++ b/Doc/whatsnew/2.7.rst
@@ -884,6 +884,18 @@ Some smaller changes made to the core Python language are:
now only cleared if no one else is holding a reference to the
dictionary (:issue:`7140`).
+* Converting between :class:`int` or :class:`long` and :class:`str` or
+ :class:`unicode` in bases other than 2 (binary), 4, 8 (octal), 16
+ (hexadecimal), or 32 such as base 10 (decimal) now raises a
+ :exc:`ValueError` if the number of digits in string form is above a
+ limit to avoid potential denial of service attacks due to the
+ algorithmic complexity. This is a mitigation for `CVE-2020-10735
+ `_.
+ This limit can be configured or disabled by environment variable or
+ :mod:`sys` APIs. See the :ref:`integer string conversion length
+ limitation ` documentation. The default limit
+ is 4300 digits in string form.
+
.. ======================================================================
.. _new-27-interpreter:
diff --git a/Include/longobject.h b/Include/longobject.h
index 4e33ff2bb17ffd4..ffda440e819cfb0 100644
--- a/Include/longobject.h
+++ b/Include/longobject.h
@@ -129,6 +129,32 @@ PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj,
char *format_spec,
Py_ssize_t format_spec_len);
+#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds the limit (%d) for integer string conversion: value has %zd digits"
+/*
+ * Default long base conversion size limitation: Denial of Service prevention.
+ *
+ * Chosen such that this isn't wildly slow on modern hardware
+ * 4300 decimal digits fits a ~14284 bit number.
+ */
+#define _PY_LONG_DEFAULT_MAX_STR_DIGITS 4300
+/*
+ * Threshold for max digits check. For performance reasons long() and
+ * long.__str__() don't checks values that are smaller than this
+ * threshold. Acts as a guaranteed minimum size limit for bignums that
+ * applications can expect from CPython.
+ *
+ * "640 digits should be enough for anyone." - gps
+ * fits a ~2126 bit decimal number.
+ */
+#define _PY_LONG_MAX_STR_DIGITS_THRESHOLD 640
+
+#if ((_PY_LONG_DEFAULT_MAX_STR_DIGITS != 0) && \
+ (_PY_LONG_DEFAULT_MAX_STR_DIGITS < _PY_LONG_MAX_STR_DIGITS_THRESHOLD))
+# error "_PY_LONG_DEFAULT_MAX_STR_DIGITS smaller than threshold."
+#endif
+
+int Py_LongMaxStrDigits;
+
#ifdef __cplusplus
}
#endif
diff --git a/Include/pydebug.h b/Include/pydebug.h
index 0f45960f90f5110..d655e3e219ee844 100644
--- a/Include/pydebug.h
+++ b/Include/pydebug.h
@@ -27,6 +27,7 @@ PyAPI_DATA(int) _Py_QnewFlag;
/* Warn about 3.x issues */
PyAPI_DATA(int) Py_Py3kWarningFlag;
PyAPI_DATA(int) Py_HashRandomizationFlag;
+PyAPI_DATA(int) Py_LongMaxStrDigitsFlag;
/* this is a wrapper around getenv() that pays attention to
Py_IgnoreEnvironmentFlag. It should be used for getting variables like
diff --git a/Include/pystate.h b/Include/pystate.h
index f2cfc30208f5ef6..c347759c5ff5adf 100644
--- a/Include/pystate.h
+++ b/Include/pystate.h
@@ -34,6 +34,8 @@ typedef struct _is {
int tscdump;
#endif
+ int long_max_str_digits;
+
} PyInterpreterState;
diff --git a/Include/pythonrun.h b/Include/pythonrun.h
index f0f4e382e5ec836..7950d94a0852885 100644
--- a/Include/pythonrun.h
+++ b/Include/pythonrun.h
@@ -128,6 +128,7 @@ PyAPI_FUNC(int) _PyLong_Init(void);
PyAPI_FUNC(void) _PyFloat_Init(void);
PyAPI_FUNC(int) PyByteArray_Init(void);
PyAPI_FUNC(void) _PyRandom_Init(void);
+PyAPI_FUNC(void) _PyLongMaxStrDigits_Init(void);
/* Various internal finalizers */
PyAPI_FUNC(void) _PyExc_Fini(void);
diff --git a/Lib/json/tests/test_decode.py b/Lib/json/tests/test_decode.py
index 0014546b8284b3e..701aaf1672dfb50 100644
--- a/Lib/json/tests/test_decode.py
+++ b/Lib/json/tests/test_decode.py
@@ -2,6 +2,7 @@
from StringIO import StringIO
from collections import OrderedDict
from json.tests import PyTest, CTest
+from test import test_support
class TestDecode(object):
@@ -65,5 +66,12 @@ def test_negative_index(self):
self.assertRaises(ValueError, d.raw_decode, 'a'*42, -50000)
self.assertRaises(ValueError, d.raw_decode, u'a'*42, -50000)
+ def test_limit_int(self):
+ maxdigits = 5000
+ with test_support.adjust_int_max_str_digits(maxdigits):
+ self.loads('1' * maxdigits)
+ with self.assertRaises(ValueError):
+ self.loads('1' * (maxdigits + 1))
+
class TestPyDecode(TestDecode, PyTest): pass
class TestCDecode(TestDecode, CTest): pass
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py
index ccc11c1b4b0a81a..00b066d3ff1ca7f 100644
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -45,7 +45,7 @@
"check_impl_detail", "get_attribute", "py3k_bytes",
"import_fresh_module", "threading_cleanup", "reap_children",
"strip_python_stderr", "IPV6_ENABLED", "run_with_tz",
- "SuppressCrashReport"]
+ "SuppressCrashReport", "adjust_int_max_str_digits"]
class Error(Exception):
"""Base class for regression test exceptions."""
@@ -2175,3 +2175,14 @@ def save(self):
def restore(self):
for signum, handler in self.handlers.items():
self.signal.signal(signum, handler)
+
+
[email protected]
+def adjust_int_max_str_digits(max_digits):
+ """Temporarily change the integer string conversion length limit."""
+ current = sys.get_int_max_str_digits()
+ try:
+ sys.set_int_max_str_digits(max_digits)
+ yield
+ finally:
+ sys.set_int_max_str_digits(current)
diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py
index 3cfe6188ac13422..b9a657ac6904618 100644
--- a/Lib/test/test_ast.py
+++ b/Lib/test/test_ast.py
@@ -492,6 +492,13 @@ def test_literal_eval_issue4907(self):
self.assertEqual(ast.literal_eval('1.5 - 2j'), 1.5 - 2j)
self.assertRaises(ValueError, ast.literal_eval, '2 + (3 + 4j)')
+ def test_literal_eval_str_int_limit(self):
+ with test_support.adjust_int_max_str_digits(4000):
+ ast.literal_eval('3'*4000)
+ with self.assertRaises(ValueError) as err_ctx:
+ ast.literal_eval('3'*4001)
+ self.assertIn('Exceeds the limit ', str(err_ctx.exception))
+
def test_main():
with test_support.check_py3k_warnings(("backquote not supported",
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index 5396838f3d26be1..74d0b8763a7b5ef 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -63,6 +63,15 @@ def __iter__(self):
class BuiltinTest(unittest.TestCase):
+ def setUp(self):
+ super(BuiltinTest, self).setUp()
+ self._previous_int_limit = sys.get_int_max_str_digits()
+ sys.set_int_max_str_digits(7000)
+
+ def tearDown(self):
+ sys.set_int_max_str_digits(self._previous_int_limit)
+ super(BuiltinTest, self).tearDown()
+
def test_import(self):
__import__('sys')
__import__('time')
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index 12f26d9e4506f19..0e31e60a3dbd067 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -134,6 +134,30 @@ def test_unknown_options(self):
self.assertEqual(err.splitlines().count(b'Unknown option: -a'), 1)
self.assertEqual(b'', out)
+ def test_int_max_str_digits(self):
+ code = 'import sys; print sys.flags.int_max_str_digits, sys.get_int_max_str_digits()'
+
+ assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='foo')
+ assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='100')
+ assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='-1')
+
+ def parse(res):
+ return tuple(int(r) for r in res[1].strip().split())
+
+ res = assert_python_ok('-c', code)
+ self.assertEqual(parse(res), (-1, sys.get_int_max_str_digits()))
+ res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='0')
+ self.assertEqual(parse(res), (0, 0))
+ res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='4000')
+ self.assertEqual(parse(res), (4000, 4000))
+ res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='100000')
+ self.assertEqual(parse(res), (100000, 100000))
+
+ res = assert_python_ok('-E', '-c', code, PYTHONINTMAXSTRDIGITS='0')
+ self.assertEqual(parse(res), (-1, sys.get_int_max_str_digits()))
+ res = assert_python_ok('-E', '-c', code, PYTHONINTMAXSTRDIGITS='4000')
+ self.assertEqual(parse(res), (-1, sys.get_int_max_str_digits()))
+
def test_main():
test.test_support.run_unittest(CmdLineTest)
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index 15a00f3cf7fcfe8..3a2e84ee52075ea 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -291,6 +291,17 @@ def test_literals_with_leading_zeroes(self):
self.assertEqual(eval("01000000000000000000000.0"),
1000000000000000000000.0)
+ def test_int_literals_too_long(self):
+ n = 3000
+ bign = '3'*n
+ source = 'a = 1\nb = 2\nc = {bign}\nd = 4'.format(bign=bign)
+ with test_support.adjust_int_max_str_digits(n):
+ compile(source, '', 'exec')
+ with test_support.adjust_int_max_str_digits(n-1):
+ with self.assertRaises(ValueError) as err_ctx:
+ compile(source, '', 'exec')
+ self.assertIn('Exceeds the limit ', str(err_ctx.exception))
+
def test_unary_minus(self):
# Verify treatment of unary minus on negative numbers SF bug #660455
if sys.maxint == 2147483647:
diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py
index ea5c0e3f48d5bbf..0de7f0c25f077ae 100644
--- a/Lib/test/test_int.py
+++ b/Lib/test/test_int.py
@@ -495,8 +495,142 @@ def __trunc__(self):
self.assertIs(type(n), IntSubclass)
+class IntStrDigitLimitsTests(unittest.TestCase):
+
+ int_class = int # Override this in subclasses to reuse the suite.
+
+ def setUp(self):
+ super(IntStrDigitLimitsTests, self).setUp()
+ self._previous_limit = sys.get_int_max_str_digits()
+ sys.set_int_max_str_digits(2048)
+
+ def tearDown(self):
+ sys.set_int_max_str_digits(self._previous_limit)
+ super(IntStrDigitLimitsTests, self).tearDown()
+
+ def stringify(self, i, will_error=False):
+ if not will_error:
+ str(i)
+ # repr will also create a string, but append 'L' if i was long
+ repr(i)
+ unicode(i)
+ return
+ with self.assertRaises(ValueError):
+ str(i)
+ with self.assertRaises(ValueError):
+ repr(i)
+ with self.assertRaises(ValueError):
+ unicode(i)
+
+ def test_disabled_limit(self):
+ self.assertGreater(sys.get_int_max_str_digits(), 0)
+ self.assertLess(sys.get_int_max_str_digits(), 20000)
+ with test_support.adjust_int_max_str_digits(0):
+ self.assertEqual(sys.get_int_max_str_digits(), 0)
+ i = self.int_class('1' * 20000)
+ self.stringify(i)
+ self.assertGreater(sys.get_int_max_str_digits(), 0)
+
+ def test_max_str_digits_edge_cases(self):
+ """Ignore the +/- sign 'L' and space padding."""
+ int_class = self.int_class
+ maxdigits = sys.get_int_max_str_digits()
+
+ int_class('1' * maxdigits)
+ int_class(' ' + '1' * maxdigits)
+ int_class('1' * maxdigits + ' ')
+ int_class('+' + '1' * maxdigits)
+ int_class('-' + '1' * maxdigits)
+ self.assertEqual(len(str(10 ** (maxdigits - 1))), maxdigits)
+
+ int_class(u'1' * maxdigits)
+ int_class(u' ' + u'1' * maxdigits)
+ int_class(u'1' * maxdigits + u' ')
+ int_class(u'+' + u'1' * maxdigits)
+ int_class(u'-' + u'1' * maxdigits)
+ self.assertEqual(len(unicode(10 ** (maxdigits - 1))), maxdigits)
+
+ def check(self, i, base=None):
+ with self.assertRaises(ValueError):
+ if base is None:
+ self.int_class(i)
+ else:
+ self.int_class(i, base)
+
+ def test_max_str_digits(self):
+ maxdigits = sys.get_int_max_str_digits()
+
+ self.check('1' * (maxdigits + 1))
+ self.check(' ' + '1' * (maxdigits + 1))
+ self.check('1' * (maxdigits + 1) + ' ')
+ self.check('+' + '1' * (maxdigits + 1))
+ self.check('-' + '1' * (maxdigits + 1))
+ self.check('1' * (maxdigits + 1))
+
+ self.check(u'1' * (maxdigits + 1))
+ self.check(u' ' + u'1' * (maxdigits + 1))
+ self.check(u'1' * (maxdigits + 1) + u' ')
+ self.check(u'+' + u'1' * (maxdigits + 1))
+ self.check(u'-' + u'1' * (maxdigits + 1))
+ self.check(u'1' * (maxdigits + 1))
+
+ i = 10 ** maxdigits
+ self.stringify(i, will_error=True)
+
+ def test_changed_limit(self):
+ int_class = self.int_class
+ newmax = 900
+ self.assertLess(sys.long_info.str_digits_check_threshold, newmax)
+ with test_support.adjust_int_max_str_digits(newmax):
+ i = 10 ** (newmax - 1)
+ self.stringify(i)
+ int_class('1' * newmax)
+
+ i = 10 ** newmax
+ self.stringify(i, will_error=True)
+ self.check('1' * (newmax + 1))
+
+ def test_power_of_two_bases_unlimited(self):
+ """The limit does not apply to power of 2 bases."""
+ maxdigits = sys.get_int_max_str_digits()
+
+ for base in (2, 4, 8, 16, 32):
+ self.int_class('1' * (maxdigits + 1), base)
+ assert maxdigits < 100000
+ self.int_class('1' * 100000, base)
+
+ def test_sign_not_counted(self):
+ int_class = self.int_class
+ maxdigits = sys.get_int_max_str_digits()
+ s = '5' * maxdigits
+ i = int_class(s)
+ pos_i = int_class('+{s}'.format(s=s))
+ assert i == pos_i
+ neg_i = int_class('-{s}'.format(s=s))
+ assert -pos_i == neg_i
+ self.stringify(pos_i)
+ self.stringify(neg_i)
+
+ def _other_base_helper(self, base):
+ int_class = self.int_class
+ maxdigits = sys.get_int_max_str_digits()
+ s = '2' * maxdigits
+ i = int_class(s, base)
+ if base > 10:
+ self.stringify(i, will_error=True)
+ elif base < 10:
+ self.stringify(i)
+ with self.assertRaises(ValueError) as err:
+ int_class('{s}1'.format(s=s), base)
+
+ def test_int_from_other_bases(self):
+ self._other_base_helper(base=3)
+ self._other_base_helper(base=36)
+
+
def test_main():
run_unittest(IntTestCases)
+ run_unittest(IntStrDigitLimitsTests)
if __name__ == "__main__":
test_main()
diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py
index 6788cedd809decb..bb43f8215085437 100644
--- a/Lib/test/test_long.py
+++ b/Lib/test/test_long.py
@@ -941,8 +941,37 @@ def test_bit_length(self):
self.assertEqual((-a-1).bit_length(), i+1)
+class LongStrDigitLimitsTests(test_int.IntStrDigitLimitsTests):
+ int_class = long
+
+ def test_l_not_counted(self):
+ int_class = self.int_class
+ maxdigits = sys.get_int_max_str_digits()
+
+ s = '5' * maxdigits
+ i = int_class(s)
+ long_i = int_class('{s}L'.format(s=s))
+ assert i == long_i
+ self.stringify(long_i)
+
+ def test_max_l_str_digits(self):
+ int_class = self.int_class
+ maxdigits = sys.get_int_max_str_digits()
+
+ int_class('1' * maxdigits + 'L')
+ int_class(u'1' * maxdigits + u'L')
+
+ self.check('1' * (maxdigits + 1) + 'L')
+ self.check(u'1' * (maxdigits + 1) + u'L')
+
+class LongSubclassStrDigitLimitsTests(LongStrDigitLimitsTests):
+ int_class = LongSubclass
+
+
def test_main():
test_support.run_unittest(LongTest)
+ test_support.run_unittest(LongStrDigitLimitsTests)
+ test_support.run_unittest(LongSubclassStrDigitLimitsTests)
if __name__ == "__main__":
test_main()
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 9342716272a7167..d3d8d9ca2f86065 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -389,13 +389,19 @@ def test_attributes(self):
self.assertIsInstance(sys.executable, basestring)
self.assertEqual(len(sys.float_info), 11)
self.assertEqual(sys.float_info.radix, 2)
- self.assertEqual(len(sys.long_info), 2)
+ self.assertEqual(len(sys.long_info), 4)
self.assertTrue(sys.long_info.bits_per_digit % 5 == 0)
self.assertTrue(sys.long_info.sizeof_digit >= 1)
+ self.assertGreaterEqual(sys.long_info.default_max_str_digits, 500)
+ self.assertGreaterEqual(sys.long_info.str_digits_check_threshold, 100)
+ self.assertGreater(sys.long_info.default_max_str_digits,
+ sys.long_info.str_digits_check_threshold)
self.assertEqual(type(sys.long_info.bits_per_digit), int)
self.assertEqual(type(sys.long_info.sizeof_digit), int)
self.assertIsInstance(sys.hexversion, int)
self.assertIsInstance(sys.maxint, int)
+ self.assertIsInstance(sys.long_info.default_max_str_digits, long)
+ self.assertIsInstance(sys.long_info.str_digits_check_threshold, long)
if test.test_support.have_unicode:
self.assertIsInstance(sys.maxunicode, int)
self.assertIsInstance(sys.platform, basestring)
@@ -436,7 +442,8 @@ def test_sys_flags(self):
attrs = ("debug", "py3k_warning", "division_warning", "division_new",
"inspect", "interactive", "optimize", "dont_write_bytecode",
"no_site", "ignore_environment", "tabcheck", "verbose",
- "unicode", "bytes_warning", "hash_randomization")
+ "unicode", "bytes_warning", "hash_randomization",
+ "int_max_str_digits")
for attr in attrs:
self.assertTrue(hasattr(sys.flags, attr), attr)
self.assertEqual(type(getattr(sys.flags, attr)), int, attr)
diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py
index 90ccb30716ff883..3ca0e3623b10060 100644
--- a/Lib/test/test_xmlrpc.py
+++ b/Lib/test/test_xmlrpc.py
@@ -222,6 +222,14 @@ def test_loads_unsupported(self):
'')
self.assertRaises(ResponseError, xmlrpclib.loads, data)
+ def test_limit_int(self):
+ data = '{i}'
+ maxdigits = 5000
+ with test_support.adjust_int_max_str_digits(maxdigits):
+ i = '1' * (maxdigits + 1)
+ with self.assertRaises(ValueError):
+ xmlrpclib.loads(data.format(i=i))
+
class HelperTestCase(unittest.TestCase):
def test_escape(self):
diff --git a/Modules/main.c b/Modules/main.c
index a6edf822d039cd4..733bb39d6b01de0 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -115,6 +115,10 @@ PYTHONHASHSEED: if this variable is set to 'random', the effect is the same\n\
as specifying the -R option: a random value is used to seed the hashes of\n\
str, bytes and datetime objects. It can also be set to an integer\n\
in the range [0,4294967295] to get hash values with a predictable seed.\n\
+PYTHONINTMAXSTRDIGITS: limits the maximum digit characters in an integer value\n\
+ when converting from a string and when converting an integer back to a str.\n\
+ A value of 0 disables the limit. Conversions to or from bases 2, 4, 8,\n\
+ 16, and 32 are never limited.\n\
";
@@ -482,6 +486,14 @@ Py_Main(int argc, char **argv)
free(buf);
}
+ /* The variable is only tested for existence here; _PyLongMaxStrDigits_Init
+ will check its value further. */
+ if (!Py_LongMaxStrDigitsFlag &&
+ (p = Py_GETENV("PYTHONINTMAXSTRDIGITS")) && *p != '\0')
+ Py_LongMaxStrDigitsFlag = 1;
+
+ _PyLongMaxStrDigits_Init();
+
if (command == NULL && module == NULL && _PyOS_optind < argc &&
strcmp(argv[_PyOS_optind], "-") != 0)
{
diff --git a/Objects/longobject.c b/Objects/longobject.c
index c05f67c36c72c81..eceb6290757f1e2 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -1338,7 +1338,7 @@ long_to_decimal_string(PyObject *aa, int addL)
{
PyLongObject *scratch, *a;
PyObject *str;
- Py_ssize_t size, strlen, size_a, i, j;
+ Py_ssize_t size, digitlen, strlen, size_a, i, j;
digit *pout, *pin, rem, tenpow;
char *p;
int negative;
@@ -1401,13 +1401,23 @@ long_to_decimal_string(PyObject *aa, int addL)
pout[size++] = 0;
/* calculate exact length of output string, and allocate */
- strlen = (addL != 0) + negative +
- 1 + (size - 1) * _PyLong_DECIMAL_SHIFT;
+ digitlen = 1 + (size - 1) * _PyLong_DECIMAL_SHIFT;
tenpow = 10;
rem = pout[size-1];
while (rem >= tenpow) {
tenpow *= 10;
- strlen++;
+ digitlen++;
+ }
+ strlen = (addL != 0) + negative + digitlen;
+ if (digitlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) {
+ PyInterpreterState *interp = PyThreadState_GET()->interp;
+ int max_str_digits = interp->long_max_str_digits;
+ if ((max_str_digits > 0) && (digitlen > max_str_digits)) {
+ Py_DECREF(scratch);
+ PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT,
+ max_str_digits, digitlen);
+ return NULL;
+ }
}
str = PyString_FromStringAndSize(NULL, strlen);
if (str == NULL) {
@@ -1761,6 +1771,7 @@ PyLong_FromString(char *str, char **pend, int base)
start = str;
if ((base & (base - 1)) == 0)
+ /* binary bases are not limited by long_max_str_digits */
z = long_from_binary_base(&str, base);
else {
/***
@@ -1885,6 +1896,17 @@ digit beyond the first.
while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base)
++scan;
+ /* Limit the size to avoid excessive computation attacks. */
+ if ((scan - str) > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) {
+ PyInterpreterState *interp = PyThreadState_GET()->interp;
+ int max_str_digits = interp->long_max_str_digits;
+ if ((max_str_digits > 0) && ((scan - str) > max_str_digits)) {
+ PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT,
+ max_str_digits, (scan - str));
+ return NULL;
+ }
+ }
+
/* Create a long object that can contain the largest possible
* integer with this base and length. Note that there's no
* need to initialize z->ob_digit -- no slot is read up before
@@ -4367,6 +4389,8 @@ internal representation of integers. The attributes are read only.");
static PyStructSequence_Field long_info_fields[] = {
{"bits_per_digit", "size of a digit in bits"},
{"sizeof_digit", "size in bytes of the C type used to represent a digit"},
+ {"default_max_str_digits", "maximum string conversion digits limitation"},
+ {"str_digits_check_threshold", "minimum positive value for long_max_str_digits"},
{NULL, NULL}
};
@@ -4374,7 +4398,7 @@ static PyStructSequence_Desc long_info_desc = {
"sys.long_info", /* name */
long_info__doc__, /* doc */
long_info_fields, /* fields */
- 2 /* number of fields */
+ 4 /* number of fields */
};
PyObject *
@@ -4389,6 +4413,10 @@ PyLong_GetInfo(void)
PyInt_FromLong(PyLong_SHIFT));
PyStructSequence_SET_ITEM(long_info, field++,
PyInt_FromLong(sizeof(digit)));
+ PyStructSequence_SET_ITEM(long_info, field++,
+ PyLong_FromLong(_PY_LONG_DEFAULT_MAX_STR_DIGITS));
+ PyStructSequence_SET_ITEM(long_info, field++,
+ PyLong_FromLong(_PY_LONG_MAX_STR_DIGITS_THRESHOLD));
if (PyErr_Occurred()) {
Py_CLEAR(long_info);
return NULL;
@@ -4399,8 +4427,45 @@ PyLong_GetInfo(void)
int
_PyLong_Init(void)
{
+ PyInterpreterState *interp = PyThreadState_GET()->interp;
/* initialize long_info */
if (Long_InfoType.tp_name == 0)
PyStructSequence_InitType(&Long_InfoType, &long_info_desc);
+ interp->long_max_str_digits = Py_LongMaxStrDigits;
+ if (interp->long_max_str_digits == -1) {
+ interp->long_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS;
+ }
return 1;
}
+
+
+void
+_PyLongMaxStrDigits_Init(void)
+{
+ char *env;
+ const char *endptr;
+ long maxdigits;
+
+
+ if (Py_LongMaxStrDigits >= 0 ||
+ !Py_LongMaxStrDigitsFlag)
+ return;
+
+ env = Py_GETENV("PYTHONINTMAXSTRDIGITS");
+ if (env && *env != '\0') {
+ errno = 0;
+ maxdigits = strtol(env, (char **)&endptr, 10);
+ if (*endptr != '\0' || errno == ERANGE || maxdigits < INT_MIN || maxdigits > INT_MAX ||
+ !((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD))) {
+#define STRINGIFY(VAL) _STRINGIFY(VAL)
+#define _STRINGIFY(VAL) #VAL
+ Py_FatalError(
+ "PYTHONINTMAXSTRDIGITS: invalid limit; must be >= "
+ STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD)
+ " or 0 for unlimited.");
+#undef _STRINGIFY
+#undef STRINGIFY
+ }
+ Py_LongMaxStrDigits = (int)maxdigits;
+ }
+}
diff --git a/Python/pystate.c b/Python/pystate.c
index f33f18202360f57..3a4f0a9db32b2a5 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -84,6 +84,7 @@ PyInterpreterState_New(void)
#ifdef WITH_TSC
interp->tscdump = 0;
#endif
+ interp->long_max_str_digits = -1;
HEAD_LOCK();
interp->next = interp_head;
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index abdfb146ae51f16..22ca16fe47daea9 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -82,6 +82,8 @@ int Py_IgnoreEnvironmentFlag; /* e.g. PYTHONPATH, PYTHONHOME */
int _Py_QnewFlag = 0;
int Py_NoUserSiteDirectory = 0; /* for -s and site.py */
int Py_HashRandomizationFlag = 0; /* for -R and PYTHONHASHSEED */
+int Py_LongMaxStrDigitsFlag = 0; /* for PYTHONINTMAXSTRDIGITS */
+int Py_LongMaxStrDigits = -1; /* for longobject.c */
/* Hack to force loading of object files */
@@ -197,8 +199,12 @@ Py_InitializeEx(int install_sigs)
check its value further. */
if ((p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0')
Py_HashRandomizationFlag = add_flag(Py_HashRandomizationFlag, p);
-
_PyRandom_Init();
+ /* The variable is only tested for existence here; _PyLongMaxStrDigits_Init
+ will check its value further. */
+ if ((p = Py_GETENV("PYTHONINTMAXSTRDIGITS")) && *p != '\0')
+ Py_LongMaxStrDigitsFlag = add_flag(Py_LongMaxStrDigitsFlag, p);
+ _PyLongMaxStrDigits_Init();
interp = PyInterpreterState_New();
if (interp == NULL)
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index fdb7af2f5f67648..fde6a8448eeec71 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -668,6 +668,46 @@ The flag constants are defined in the ctypes and DLFCN modules.");
#endif /* HAVE_DLOPEN */
+static PyObject *
+sys_set_int_max_str_digits(PyObject *self, PyObject *args)
+{
+ int new_val;
+ PyThreadState *tstate = PyThreadState_GET();
+ if (!PyArg_ParseTuple(args, "i:set_int_max_str_digits", &new_val))
+ return NULL;
+ if (!tstate)
+ return NULL;
+ if ((!new_val) || new_val >= (_PY_LONG_MAX_STR_DIGITS_THRESHOLD)) {
+ tstate->interp->long_max_str_digits = new_val;
+ } else {
+ PyErr_Format(
+ PyExc_ValueError, "maxdigits must be 0 or larger than %d",
+ _PY_LONG_MAX_STR_DIGITS_THRESHOLD);
+ return NULL;
+ }
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+PyDoc_STRVAR(set_int_max_str_digits_doc,
+"set_int_max_str_digits_doc() -> None\n\
+\n\
+Set the maximum string digits limit for non-binary int<->str conversions.");
+
+static PyObject *
+sys_get_int_max_str_digits(PyObject *self, PyObject *args)
+{
+ PyThreadState *tstate = PyThreadState_GET();
+ if (!tstate)
+ return NULL;
+ return PyInt_FromLong(tstate->interp->long_max_str_digits);
+}
+
+PyDoc_STRVAR(get_int_max_str_digits_doc,
+"get_int_max_str_digits_doc() -> int\n\
+\n\
+Return the maximum string digits limit for non-binary int<->str conversions.");
+
#ifdef USE_MALLOPT
/* Link with -lmalloc (or -lmpc) on an SGI */
#include
@@ -937,6 +977,8 @@ static PyMethodDef sys_methods[] = {
{"getdlopenflags", (PyCFunction)sys_getdlopenflags, METH_NOARGS,
getdlopenflags_doc},
#endif
+ {"get_int_max_str_digits", (PyCFunction)sys_get_int_max_str_digits,
+ METH_NOARGS, get_int_max_str_digits_doc},
#ifdef COUNT_ALLOCS
{"getcounts", (PyCFunction)sys_getcounts, METH_NOARGS},
#endif
@@ -978,6 +1020,8 @@ static PyMethodDef sys_methods[] = {
{"setdlopenflags", sys_setdlopenflags, METH_VARARGS,
setdlopenflags_doc},
#endif
+ {"set_int_max_str_digits", (PyCFunction)sys_set_int_max_str_digits,
+ METH_VARARGS, set_int_max_str_digits_doc},
{"setprofile", sys_setprofile, METH_O, setprofile_doc},
{"getprofile", sys_getprofile, METH_NOARGS, getprofile_doc},
{"setrecursionlimit", sys_setrecursionlimit, METH_VARARGS,
@@ -1139,6 +1183,7 @@ exc_info() -- return thread-safe information about the current exception\n\
exc_clear() -- clear the exception state for the current thread\n\
exit() -- exit the interpreter by raising SystemExit\n\
getdlopenflags() -- returns flags to be used for dlopen() calls\n\
+get_int_max_str_digits() -- returns the length limit for non-binary int<->str conversions\n\
getprofile() -- get the global profiling function\n\
getrefcount() -- return the reference count for an object (plus one :-)\n\
getrecursionlimit() -- return the max recursion depth for the interpreter\n\
@@ -1146,6 +1191,7 @@ getsizeof() -- return the size of an object in bytes\n\
gettrace() -- get the global debug tracing function\n\
setcheckinterval() -- control how often the interpreter checks for events\n\
setdlopenflags() -- set the flags to be used for dlopen() calls\n\
+set_int_max_str_digits() -- set the length limit for non-binary int<->str conversions\n\
setprofile() -- set the global profiling function\n\
setrecursionlimit() -- set the max recursion depth for the interpreter\n\
settrace() -- set the global debug tracing function\n\
@@ -1227,6 +1273,7 @@ static PyStructSequence_Field flags_fields[] = {
/* {"skip_first", "-x"}, */
{"bytes_warning", "-b"},
{"hash_randomization", "-R"},
+ {"int_max_str_digits", "???"},
{0}
};
@@ -1235,9 +1282,9 @@ static PyStructSequence_Desc flags_desc = {
flags__doc__, /* doc */
flags_fields, /* fields */
#ifdef RISCOS
- 17
+ 18
#else
- 16
+ 17
#endif
};
@@ -1275,6 +1322,7 @@ make_flags(void)
/* SetFlag(skipfirstline); */
SetFlag(Py_BytesWarningFlag);
SetFlag(Py_HashRandomizationFlag);
+ SetFlag(Py_LongMaxStrDigits);
#undef SetFlag
if (PyErr_Occurred()) {