Skip to content

Commit be43bb6

Browse files
Match CPython error type for non-ASCII struct format arguments (RustPython#7681)
* Match CPython error type for non-ASCII struct format arguments Struct() raised the wrong exception type when the format argument contained non-ASCII characters: - str input with non-ASCII char: RustPython raised UnicodeDecodeError with an empty message; CPython raises UnicodeEncodeError as if format.encode('ascii') had been called directly. - bytes input with non-ASCII byte: same wrong UnicodeDecodeError; CPython passes the bytes through to the format parser, which then errors with struct.error("bad char in struct format"). Restructure IntoStructFormatBytes::try_from_object to: - raise UnicodeEncodeError("ascii", s, start, start+1, "ordinal not in range(128)") for non-ASCII str, with start computed as the first non-ASCII code point position (matching CPython's natural encoding-error format); - raise struct.error("bad char in struct format") for non-ASCII bytes, produced via the existing new_struct_error helper. Probed byte-identical with CPython 3.14.4 for both cases. Full test.test_struct (43 tests) passes with no regressions. Sanity-tested all standard format/pack/unpack/calcsize call shapes remain unchanged. * Add regression test for non-ASCII format string error types * Use raise AssertionError instead of assert False (B011)
1 parent 6ab1f80 commit be43bb6

2 files changed

Lines changed: 46 additions & 8 deletions

File tree

crates/stdlib/src/pystruct.rs

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,42 @@ pub(crate) mod _struct {
2626

2727
impl TryFromObject for IntoStructFormatBytes {
2828
fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<Self> {
29-
// CPython turns str to bytes but we do reversed way here
30-
// The only performance difference is this transition cost
29+
// CPython turns str to bytes (via str.encode('ascii')) but we keep str.
30+
// The error reporting for non-ASCII input still matches CPython:
31+
// - str input with non-ASCII char: UnicodeEncodeError, the same exception
32+
// str.encode('ascii') would produce.
33+
// - bytes input with non-ASCII byte: struct.error("bad char in struct format"),
34+
// matching CPython where bytes are passed through to the format parser.
3135
let fmt = match_class!(match obj {
32-
s @ PyStr => s.isascii().then_some(s),
33-
b @ PyBytes => ascii::AsciiStr::from_ascii(&b)
34-
.ok()
35-
.map(|s| vm.ctx.new_str(s)),
36+
s @ PyStr => {
37+
if !s.isascii() {
38+
let start = s
39+
.as_wtf8()
40+
.code_points()
41+
.position(|cp| !cp.to_char().is_some_and(|c| c.is_ascii()))
42+
.unwrap_or(0);
43+
return Err(vm.new_unicode_encode_error_real(
44+
vm.ctx.new_str("ascii"),
45+
s,
46+
start,
47+
start + 1,
48+
vm.ctx.new_str("ordinal not in range(128)"),
49+
));
50+
}
51+
s
52+
}
53+
b @ PyBytes => {
54+
let ascii_str = ascii::AsciiStr::from_ascii(&b).map_err(|_| {
55+
new_struct_error(vm, "bad char in struct format".to_owned())
56+
})?;
57+
vm.ctx.new_str(ascii_str)
58+
}
3659
other =>
3760
return Err(vm.new_type_error(format!(
3861
"Struct() argument 1 must be a str or bytes object, not {}",
3962
other.class().name()
4063
))),
41-
})
42-
.ok_or_else(|| vm.new_unicode_decode_error("Struct format must be a ascii string"))?;
64+
});
4365
Ok(Self(fmt))
4466
}
4567
}

extra_tests/snippets/stdlib_struct.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,19 @@ def __index__(self):
7676

7777
assert struct.error.__module__ == "struct"
7878
assert struct.error.__name__ == "error"
79+
80+
# Non-ASCII format string: error type matches CPython.
81+
# str → UnicodeEncodeError (encoding='ascii')
82+
# bytes → struct.error
83+
try:
84+
struct.Struct("\udc00")
85+
except UnicodeEncodeError as e:
86+
assert e.encoding == "ascii"
87+
else:
88+
raise AssertionError("expected UnicodeEncodeError")
89+
90+
with assert_raises(UnicodeEncodeError):
91+
struct.Struct("한")
92+
93+
with assert_raises(struct.error):
94+
struct.Struct(b"\xff")

0 commit comments

Comments
 (0)