"""Extracting and changing portions of the current line All functions take cursor offset from the beginning of the line and the line of Python code, and return None, or a tuple of the start index, end index, and the word.""" import re from dataclasses import dataclass from itertools import chain from typing import Optional, Tuple from .lazyre import LazyReCompile @dataclass class LinePart: start: int stop: int word: str _current_word_re = LazyReCompile(r"(?<![)\]\w_.])" r"([\w_][\w0-9._]*[(]?)") CHARACTER_PAIR_MAP = {"(": ")", "{": "}", "[": "]", "'": "'", '"': '"'} def current_word(cursor_offset: int, line: str) -> Optional[LinePart]: """the object.attribute.attribute just before or under the cursor""" start = cursor_offset end = cursor_offset word = None for m in _current_word_re.finditer(line): if m.start(1) < cursor_offset <= m.end(1): start = m.start(1) end = m.end(1) word = m.group(1) if word is None: return None return LinePart(start, end, word) # pieces of regex to match repr() of several hashable built-in types _match_all_dict_keys = r"""[^\]]*""" # https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals _match_single_quote_str_bytes = r""" # bytes repr() begins with `b` character; bytes and str begin with `'` b?' # match escape sequence; this handles `\'` in the string repr() (?:\\['"nabfrtvxuU\\]| # or match any non-`\` and non-single-quote character (most of the string) [^'\\])* # matches hanging `\` or ending `'` if one is present [\\']? """ # bytes and str repr() only uses double quotes if the string contains 1 or more # `'` character and exactly 0 `"` characters _match_double_quote_str_bytes = r""" # bytes repr() begins with `b` character b?" # string continues until a `"` character is reached [^"]* # end matching at closing double-quote if one is present "?""" # match valid identifier name followed by `[` character _match_dict_before_key = r"""[\w_][\w0-9._]*\[""" _current_dict_key_re = LazyReCompile( f"{_match_dict_before_key}((?:" f"{_match_single_quote_str_bytes}|" f"{_match_double_quote_str_bytes}|" f"{_match_all_dict_keys}|)*)", re.VERBOSE, ) def current_dict_key(cursor_offset: int, line: str) -> Optional[LinePart]: """If in dictionary completion, return the current key""" for m in _current_dict_key_re.finditer(line): if m.start(1) <= cursor_offset <= m.end(1): return LinePart(m.start(1), m.end(1), m.group(1)) return None # capture valid identifier name if followed by `[` character _capture_dict_name = r"""([\w_][\w0-9._]*)\[""" _current_dict_re = LazyReCompile( f"{_capture_dict_name}((?:" f"{_match_single_quote_str_bytes}|" f"{_match_double_quote_str_bytes}|" f"{_match_all_dict_keys}|)*)", re.VERBOSE, ) def current_dict(cursor_offset: int, line: str) -> Optional[LinePart]: """If in dictionary completion, return the dict that should be used""" for m in _current_dict_re.finditer(line): if m.start(2) <= cursor_offset <= m.end(2): return LinePart(m.start(1), m.end(1), m.group(1)) return None _current_string_re = LazyReCompile( '''(?P(?:""")|"|(?:''\')|')(?:((?P.+?)(?P=open))|''' """(?P.+))""" ) def current_string(cursor_offset: int, line: str) -> Optional[LinePart]: """If inside a string of nonzero length, return the string (excluding quotes) Weaker than bpython.Repl's current_string, because that checks that a string is a string based on previous lines in the buffer.""" for m in _current_string_re.finditer(line): i = 3 if m.group(3) else 4 if m.start(i) <= cursor_offset <= m.end(i): return LinePart(m.start(i), m.end(i), m.group(i)) return None _current_object_re = LazyReCompile(r"([\w_][\w0-9_]*)[.]") def current_object(cursor_offset: int, line: str) -> Optional[LinePart]: """If in attribute completion, the object on which attribute should be looked up.""" match = current_word(cursor_offset, line) if match is None: return None s = ".".join( m.group(1) for m in _current_object_re.finditer(match.word) if m.end(1) + match.start < cursor_offset ) if not s: return None return LinePart(match.start, match.start + len(s), s) _current_object_attribute_re = LazyReCompile(r"([\w_][\w0-9_]*)[.]?") def current_object_attribute( cursor_offset: int, line: str ) -> Optional[LinePart]: """If in attribute completion, the attribute being completed""" # TODO replace with more general current_expression_attribute match = current_word(cursor_offset, line) if match is None: return None matches = _current_object_attribute_re.finditer(match.word) next(matches) for m in matches: if m.start(1) + match.start <= cursor_offset <= m.end(1) + match.start: return LinePart( m.start(1) + match.start, m.end(1) + match.start, m.group(1) ) return None _current_from_import_from_re = LazyReCompile( r"from +([\w0-9_.]*)(?:\s+import\s+([\w0-9_]+[,]?\s*)+)*" ) def current_from_import_from( cursor_offset: int, line: str ) -> Optional[LinePart]: """If in from import completion, the word after from returns None if cursor not in or just after one of the two interesting parts of an import: from (module) import (name1, name2) """ # TODO allow for as's for m in _current_from_import_from_re.finditer(line): if (m.start(1) < cursor_offset <= m.end(1)) or ( m.start(2) < cursor_offset <= m.end(2) ): return LinePart(m.start(1), m.end(1), m.group(1)) return None _current_from_import_import_re_1 = LazyReCompile( r"from\s+([\w0-9_.]*)\s+import" ) _current_from_import_import_re_2 = LazyReCompile(r"([\w0-9_]+)") _current_from_import_import_re_3 = LazyReCompile(r", *([\w0-9_]*)") def current_from_import_import( cursor_offset: int, line: str ) -> Optional[LinePart]: """If in from import completion, the word after import being completed returns None if cursor not in or just after one of these words """ baseline = _current_from_import_import_re_1.search(line) if baseline is None: return None match1 = _current_from_import_import_re_2.search(line[baseline.end() :]) if match1 is None: return None for m in chain( (match1,), _current_from_import_import_re_3.finditer(line[baseline.end() :]), ): start = baseline.end() + m.start(1) end = baseline.end() + m.end(1) if start < cursor_offset <= end: return LinePart(start, end, m.group(1)) return None _current_import_re_1 = LazyReCompile(r"import") _current_import_re_2 = LazyReCompile(r"([\w0-9_.]+)") _current_import_re_3 = LazyReCompile(r"[,][ ]*([\w0-9_.]*)") def current_import(cursor_offset: int, line: str) -> Optional[LinePart]: # TODO allow for multiple as's baseline = _current_import_re_1.search(line) if baseline is None: return None match1 = _current_import_re_2.search(line[baseline.end() :]) if match1 is None: return None for m in chain( (match1,), _current_import_re_3.finditer(line[baseline.end() :]) ): start = baseline.end() + m.start(1) end = baseline.end() + m.end(1) if start < cursor_offset <= end: return LinePart(start, end, m.group(1)) return None _current_method_definition_name_re = LazyReCompile(r"def\s+([a-zA-Z_][\w]*)") def current_method_definition_name( cursor_offset: int, line: str ) -> Optional[LinePart]: """The name of a method being defined""" for m in _current_method_definition_name_re.finditer(line): if m.start(1) <= cursor_offset <= m.end(1): return LinePart(m.start(1), m.end(1), m.group(1)) return None _current_single_word_re = LazyReCompile(r"(?<![.])\b([a-zA-Z_][\w]*)") def current_single_word(cursor_offset: int, line: str) -> Optional[LinePart]: """the un-dotted word just before or under the cursor""" for m in _current_single_word_re.finditer(line): if m.start(1) <= cursor_offset <= m.end(1): return LinePart(m.start(1), m.end(1), m.group(1)) return None def current_dotted_attribute( cursor_offset: int, line: str ) -> Optional[LinePart]: """The dotted attribute-object pair before the cursor""" match = current_word(cursor_offset, line) if match is not None and "." in match.word[1:]: return match return None _current_expression_attribute_re = LazyReCompile( r"[.]\s*((?:[\w_][\w0-9_]*)|(?:))" ) def current_expression_attribute( cursor_offset: int, line: str ) -> Optional[LinePart]: """If after a dot, the attribute being completed""" # TODO replace with more general current_expression_attribute for m in _current_expression_attribute_re.finditer(line): if m.start(1) <= cursor_offset <= m.end(1): return LinePart(m.start(1), m.end(1), m.group(1)) return None def cursor_on_closing_char_pair( cursor_offset: int, line: str, ch: Optional[str] = None ) -> Tuple[bool, bool]: """Checks if cursor sits on closing character of a pair and whether its pair character is directly behind it """ on_closing_char, pair_close = False, False if line is None: return on_closing_char, pair_close if cursor_offset < len(line): cur_char = line[cursor_offset] if cur_char in CHARACTER_PAIR_MAP.values(): on_closing_char = True if ch is None else cur_char == ch if cursor_offset > 0: prev_char = line[cursor_offset - 1] if ( on_closing_char and prev_char in CHARACTER_PAIR_MAP and CHARACTER_PAIR_MAP[prev_char] == cur_char ): pair_close = True if ch is None else prev_char == ch return on_closing_char, pair_close