Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 22 additions & 64 deletions crates/compiler-core/src/bytecode/instruction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -614,23 +614,15 @@ impl Instruction {
/// Map a specialized opcode back to its adaptive (base) variant.
/// `_PyOpcode_Deopt`
pub const fn deopt(self) -> Option<Self> {
Some(match self {
// RESUME specializations
Self::ResumeCheck => Self::Resume {
context: Arg::marker(),
},
// LOAD_CONST specializations
Self::LoadConstMortal | Self::LoadConstImmortal => Self::LoadConst {
consti: Arg::marker(),
},
// TO_BOOL specializations
let opcode = match self {
Self::ResumeCheck => Opcode::Resume,
Self::LoadConstMortal | Self::LoadConstImmortal => Opcode::LoadConst,
Self::ToBoolAlwaysTrue
| Self::ToBoolBool
| Self::ToBoolInt
| Self::ToBoolList
| Self::ToBoolNone
| Self::ToBoolStr => Self::ToBool,
// BINARY_OP specializations
| Self::ToBoolStr => Opcode::ToBool,
Self::BinaryOpMultiplyInt
| Self::BinaryOpAddInt
| Self::BinaryOpSubtractInt
Expand All @@ -645,34 +637,18 @@ impl Instruction {
| Self::BinaryOpSubscrDict
| Self::BinaryOpSubscrGetitem
| Self::BinaryOpExtend
| Self::BinaryOpInplaceAddUnicode => Self::BinaryOp { op: Arg::marker() },
// STORE_SUBSCR specializations
Self::StoreSubscrDict | Self::StoreSubscrListInt => Self::StoreSubscr,
// SEND specializations
Self::SendGen => Self::Send {
delta: Arg::marker(),
},
// UNPACK_SEQUENCE specializations
| Self::BinaryOpInplaceAddUnicode => Opcode::BinaryOp,
Self::StoreSubscrDict | Self::StoreSubscrListInt => Opcode::StoreSubscr,
Self::SendGen => Opcode::Send,
Self::UnpackSequenceTwoTuple | Self::UnpackSequenceTuple | Self::UnpackSequenceList => {
Self::UnpackSequence {
count: Arg::marker(),
}
Opcode::UnpackSequence
}
// STORE_ATTR specializations

Self::StoreAttrInstanceValue | Self::StoreAttrSlot | Self::StoreAttrWithHint => {
Self::StoreAttr {
namei: Arg::marker(),
}
Opcode::StoreAttr
}
// LOAD_GLOBAL specializations
Self::LoadGlobalModule | Self::LoadGlobalBuiltin => Self::LoadGlobal {
namei: Arg::marker(),
},
// LOAD_SUPER_ATTR specializations
Self::LoadSuperAttrAttr | Self::LoadSuperAttrMethod => Self::LoadSuperAttr {
namei: Arg::marker(),
},
// LOAD_ATTR specializations
Self::LoadGlobalModule | Self::LoadGlobalBuiltin => Opcode::LoadGlobal,
Self::LoadSuperAttrAttr | Self::LoadSuperAttrMethod => Opcode::LoadSuperAttr,
Self::LoadAttrInstanceValue
| Self::LoadAttrModule
| Self::LoadAttrWithHint
Expand All @@ -685,28 +661,13 @@ impl Instruction {
| Self::LoadAttrMethodNoDict
| Self::LoadAttrMethodLazyDict
| Self::LoadAttrNondescriptorWithValues
| Self::LoadAttrNondescriptorNoDict => Self::LoadAttr {
namei: Arg::marker(),
},
// COMPARE_OP specializations
Self::CompareOpFloat | Self::CompareOpInt | Self::CompareOpStr => Self::CompareOp {
opname: Arg::marker(),
},
// CONTAINS_OP specializations
Self::ContainsOpSet | Self::ContainsOpDict => Self::ContainsOp {
invert: Arg::marker(),
},
// JUMP_BACKWARD specializations
Self::JumpBackwardNoJit | Self::JumpBackwardJit => Self::JumpBackward {
delta: Arg::marker(),
},
// FOR_ITER specializations
| Self::LoadAttrNondescriptorNoDict => Opcode::LoadAttr,
Self::CompareOpFloat | Self::CompareOpInt | Self::CompareOpStr => Opcode::CompareOp,
Self::ContainsOpSet | Self::ContainsOpDict => Opcode::ContainsOp,
Self::JumpBackwardNoJit | Self::JumpBackwardJit => Opcode::JumpBackward,
Self::ForIterList | Self::ForIterTuple | Self::ForIterRange | Self::ForIterGen => {
Self::ForIter {
delta: Arg::marker(),
}
Opcode::ForIter
}
// CALL specializations
Self::CallBoundMethodExactArgs
| Self::CallPyExactArgs
| Self::CallType1
Expand All @@ -726,15 +687,12 @@ impl Instruction {
| Self::CallAllocAndEnterInit
| Self::CallPyGeneral
| Self::CallBoundMethodGeneral
| Self::CallNonPyGeneral => Self::Call {
argc: Arg::marker(),
},
// CALL_KW specializations
Self::CallKwBoundMethod | Self::CallKwPy | Self::CallKwNonPy => Self::CallKw {
argc: Arg::marker(),
},
| Self::CallNonPyGeneral => Opcode::Call,
Self::CallKwBoundMethod | Self::CallKwPy | Self::CallKwNonPy => Opcode::CallKw,
_ => return None,
})
};

Some(opcode.as_instruction())
}

/// Map a specialized or instrumented opcode back to its adaptive (base) variant.
Expand Down
70 changes: 33 additions & 37 deletions scripts/generate_opcode_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,48 +85,44 @@ def extract_enum_body(text: str, name: str) -> str:
return text[start + 1 : i]


def build_deopts(contents: str) -> dict[str, list[str]]:
raw_body = re.search(
r"fn deopt\(self\) -> Option<Self>(.*)", contents, re.DOTALL
).group(1)
body = "\n".join(
itertools.takewhile(
lambda l: not l.startswith("_ =>"), # Take until reaching fallback
filter(
lambda l: (
not l.startswith(
("//", "Some(match")
) # Skip comments or start of match
),
map(str.strip, raw_body.splitlines()),
),
)
).removeprefix("{")
def build_deopts(text: str) -> dict[str, list[str]]:
raw_body = re.search(r"fn deopt\(self\)(.*)", text, re.DOTALL).group(1)
match_start = raw_body.find("match self")
if match_start == -1:
raise ValueError("Could not detect a match statement in deopt method")

depth = 0
arms = []
buf = []
for char in body:
if char == "{":
depth += 1
elif char == "}":
depth -= 1
brace_depth = 0
block_start = None
block_end = None

if depth == 0 and (char in ("}", ",")):
arm = "".join(buf).strip()
arms.append(arm)
buf = []
else:
buf.append(char)
for i, ch in enumerate(raw_body[match_start:], match_start):
if ch == "{":
brace_depth += 1
if block_start is None:
block_start = i + 1
elif ch == "}":
brace_depth -= 1
if brace_depth == 0:
block_end = i
break

match_body = raw_body[block_start:block_end]
Comment on lines +94 to +109
Copy link
Copy Markdown
Contributor

@coderabbitai coderabbitai Bot Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Fail fast if the brace scan never closes the match block.

raw_body[block_start:block_end] accepts None, so a parse miss here degrades into slicing to EOF instead of surfacing an error. That makes it possible to emit incomplete _specializations data silently if a valid future edit in deopt() throws off the brace counter.

Suggested guard
     for i, ch in enumerate(raw_body[match_start:], match_start):
         if ch == "{":
             brace_depth += 1
             if block_start is None:
                 block_start = i + 1
         elif ch == "}":
             brace_depth -= 1
             if brace_depth == 0:
                 block_end = i
                 break
 
+    if block_start is None or block_end is None:
+        raise ValueError("Could not isolate deopt match block")
+
     match_body = raw_body[block_start:block_end]
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
brace_depth = 0
block_start = None
block_end = None
if depth == 0 and (char in ("}", ",")):
arm = "".join(buf).strip()
arms.append(arm)
buf = []
else:
buf.append(char)
for i, ch in enumerate(raw_body[match_start:], match_start):
if ch == "{":
brace_depth += 1
if block_start is None:
block_start = i + 1
elif ch == "}":
brace_depth -= 1
if brace_depth == 0:
block_end = i
break
match_body = raw_body[block_start:block_end]
brace_depth = 0
block_start = None
block_end = None
for i, ch in enumerate(raw_body[match_start:], match_start):
if ch == "{":
brace_depth += 1
if block_start is None:
block_start = i + 1
elif ch == "}":
brace_depth -= 1
if brace_depth == 0:
block_end = i
break
if block_start is None or block_end is None:
raise ValueError("Could not isolate deopt match block")
match_body = raw_body[block_start:block_end]
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@scripts/generate_opcode_metadata.py` around lines 94 - 109, The current
brace-scan can leave block_start or block_end as None and then silently slice
raw_body[block_start:block_end]; update the scan logic (using variables
brace_depth, block_start, block_end, match_start) to validate after the loop
that both block_start and block_end were found and raise/throw a clear parsing
error (including context like match_start or a snippet of raw_body) if either is
None so the script fails fast instead of producing incomplete
match_body/_specializations.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is fine, we don't care about the closing the match block, we care only about the match arms

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ShaharNaveh, that's a fair point. Since the arm-extraction regex (Self::\w+ => Opcode::\w+) is specific enough that it won't produce false positives from unrelated code even if match_body happens to extend to EOF, the tight bounding of the block isn't strictly required for correctness. I'll withdraw the suggestion.


🧠 Learnings used
Learnt from: ShaharNaveh
Repo: RustPython/RustPython PR: 7573
File: scripts/generate_opcode_metadata.py:69-85
Timestamp: 2026-04-15T08:58:35.342Z
Learning: In `scripts/generate_opcode_metadata.py` (RustPython repo), `extract_enum_body` intentionally returns `None` when the enum is not found or the closing brace is missing. The author wants the caller to crash with a `TypeError` in that case, rather than raising an explicit exception. Do not suggest converting `return None` to `raise ValueError(...)` in this function.


# last arm
arms.append("".join(buf))
arms = [arm for arm in arms if arm]
arm_pattern = re.compile(
r"((?:Self::\w+\s*\|\s*)*Self::\w+)\s*=>\s*(?:\{\s*)?Opcode::(\w+)", re.DOTALL
)
variants_pattern = re.compile(r"Self::(\w+)")

deopts = {}
for arm in arms:
*specialized, deopt = map(to_snake_case, re.findall(r"Self::(\w*)\b", arm))
deopts[deopt] = specialized
for hit in arm_pattern.finditer(match_body):
raw_variants = hit.group(1)
opcode = hit.group(2)

variants = variants_pattern.findall(raw_variants)

key = to_snake_case(opcode)
value = [to_snake_case(variant) for variant in variants]
deopts[key] = value

return deopts

Expand Down
Loading