Skip to content

Commit 58c4b2c

Browse files
author
jvr
committed
patch #683515: "Add unicode support to compile(), eval() and exec"
Incorporated nnorwitz's comment re. Py__USING_UNICODE. git-svn-id: http://svn.python.org/projects/python/trunk@31310 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 58094e1 commit 58c4b2c

5 files changed

Lines changed: 61 additions & 9 deletions

File tree

Include/pythonrun.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ extern "C" {
99

1010
#define PyCF_MASK (CO_FUTURE_DIVISION)
1111
#define PyCF_MASK_OBSOLETE (CO_GENERATOR_ALLOWED | CO_NESTED)
12+
#define PyCF_SOURCE_IS_UTF8 0x0100
1213

1314
typedef struct {
1415
int cf_flags; /* bitmask of CO_xxx flags relevant to future */

Lib/test/test_builtin.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,8 @@ def test_compile(self):
195195
self.assertRaises(TypeError, compile)
196196
self.assertRaises(ValueError, compile, 'print 42\n', '<string>', 'badmode')
197197
self.assertRaises(ValueError, compile, 'print 42\n', '<string>', 'single', 0xff)
198+
if have_unicode:
199+
compile(unicode('print u"\xc3\xa5"\n', 'utf8'), '', 'exec')
198200

199201
def test_complex(self):
200202
class OS:
@@ -309,6 +311,8 @@ def test_eval(self):
309311
self.assertEqual(eval(unicode('c'), globals, locals), 300)
310312
bom = '\xef\xbb\xbf'
311313
self.assertEqual(eval(bom + 'a', globals, locals), 1)
314+
self.assertEqual(eval(unicode('u"\xc3\xa5"', 'utf8'), globals),
315+
unicode('\xc3\xa5', 'utf8'))
312316
self.assertRaises(TypeError, eval)
313317
self.assertRaises(TypeError, eval, ())
314318

Python/bltinmodule.c

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -340,11 +340,32 @@ builtin_compile(PyObject *self, PyObject *args)
340340
int dont_inherit = 0;
341341
int supplied_flags = 0;
342342
PyCompilerFlags cf;
343+
PyObject *result, *cmd, *tmp = NULL;
343344

344-
if (!PyArg_ParseTuple(args, "sss|ii:compile", &str, &filename,
345+
if (!PyArg_ParseTuple(args, "Oss|ii:compile", &cmd, &filename,
345346
&startstr, &supplied_flags, &dont_inherit))
346347
return NULL;
347348

349+
cf.cf_flags = supplied_flags;
350+
351+
#ifdef Py_USING_UNICODE
352+
if (PyUnicode_Check(cmd)) {
353+
tmp = PyUnicode_AsUTF8String(cmd);
354+
if (tmp == NULL)
355+
return NULL;
356+
cmd = tmp;
357+
cf.cf_flags |= PyCF_SOURCE_IS_UTF8;
358+
}
359+
#endif
360+
if (!PyString_Check(cmd)) {
361+
PyErr_SetString(PyExc_TypeError,
362+
"compile() arg 1 must be a string");
363+
return NULL;
364+
}
365+
366+
if (PyString_AsStringAndSize(cmd, &str, NULL))
367+
return NULL;
368+
348369
if (strcmp(startstr, "exec") == 0)
349370
start = Py_file_input;
350371
else if (strcmp(startstr, "eval") == 0)
@@ -364,11 +385,12 @@ builtin_compile(PyObject *self, PyObject *args)
364385
}
365386
/* XXX Warn if (supplied_flags & PyCF_MASK_OBSOLETE) != 0? */
366387

367-
cf.cf_flags = supplied_flags;
368388
if (!dont_inherit) {
369389
PyEval_MergeCompilerFlags(&cf);
370390
}
371-
return Py_CompileStringFlags(str, filename, start, &cf);
391+
result = Py_CompileStringFlags(str, filename, start, &cf);
392+
Py_XDECREF(tmp);
393+
return result;
372394
}
373395

374396
PyDoc_STRVAR(compile_doc,
@@ -428,7 +450,7 @@ Return the tuple ((x-x%y)/y, x%y). Invariant: div*y + mod == x.");
428450
static PyObject *
429451
builtin_eval(PyObject *self, PyObject *args)
430452
{
431-
PyObject *cmd;
453+
PyObject *cmd, *result, *tmp = NULL;
432454
PyObject *globals = Py_None, *locals = Py_None;
433455
char *str;
434456
PyCompilerFlags cf;
@@ -467,14 +489,26 @@ builtin_eval(PyObject *self, PyObject *args)
467489
"eval() arg 1 must be a string or code object");
468490
return NULL;
469491
}
492+
cf.cf_flags = 0;
493+
494+
#ifdef Py_USING_UNICODE
495+
if (PyUnicode_Check(cmd)) {
496+
tmp = PyUnicode_AsUTF8String(cmd);
497+
if (tmp == NULL)
498+
return NULL;
499+
cmd = tmp;
500+
cf.cf_flags |= PyCF_SOURCE_IS_UTF8;
501+
}
502+
#endif
470503
if (PyString_AsStringAndSize(cmd, &str, NULL))
471504
return NULL;
472505
while (*str == ' ' || *str == '\t')
473506
str++;
474507

475-
cf.cf_flags = 0;
476508
(void)PyEval_MergeCompilerFlags(&cf);
477-
return PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf);
509+
result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf);
510+
Py_XDECREF(tmp);
511+
return result;
478512
}
479513

480514
PyDoc_STRVAR(eval_doc,

Python/ceval.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3122,7 +3122,7 @@ int
31223122
PyEval_MergeCompilerFlags(PyCompilerFlags *cf)
31233123
{
31243124
PyFrameObject *current_frame = (PyFrameObject *)PyEval_GetFrame();
3125-
int result = 0;
3125+
int result = cf->cf_flags != 0;
31263126

31273127
if (current_frame != NULL) {
31283128
const int codeflags = current_frame->f_code->co_flags;
@@ -3898,16 +3898,27 @@ exec_statement(PyFrameObject *f, PyObject *prog, PyObject *globals,
38983898
locals);
38993899
}
39003900
else {
3901+
PyObject *tmp = NULL;
39013902
char *str;
39023903
PyCompilerFlags cf;
3904+
cf.cf_flags = 0;
3905+
#ifdef Py_USING_UNICODE
3906+
if (PyUnicode_Check(prog)) {
3907+
tmp = PyUnicode_AsUTF8String(prog);
3908+
if (tmp == NULL)
3909+
return -1;
3910+
prog = tmp;
3911+
cf.cf_flags |= PyCF_SOURCE_IS_UTF8;
3912+
}
3913+
#endif
39033914
if (PyString_AsStringAndSize(prog, &str, NULL))
39043915
return -1;
3905-
cf.cf_flags = 0;
39063916
if (PyEval_MergeCompilerFlags(&cf))
39073917
v = PyRun_StringFlags(str, Py_file_input, globals,
39083918
locals, &cf);
39093919
else
39103920
v = PyRun_String(str, Py_file_input, globals, locals);
3921+
Py_XDECREF(tmp);
39113922
}
39123923
if (plain)
39133924
PyFrame_LocalsToFast(f, 0);

Python/compile.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4206,7 +4206,9 @@ jcompile(node *n, const char *filename, struct compiling *base,
42064206
PyCodeObject *co;
42074207
if (!com_init(&sc, filename))
42084208
return NULL;
4209-
if (TYPE(n) == encoding_decl) {
4209+
if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
4210+
sc.c_encoding = "utf-8";
4211+
} else if (TYPE(n) == encoding_decl) {
42104212
sc.c_encoding = STR(n);
42114213
n = CHILD(n, 0);
42124214
} else {

0 commit comments

Comments
 (0)