From 2d6f20fca4882eb30f402cc257893641fb4697a3 Mon Sep 17 00:00:00 2001 From: Roland Walker Date: Wed, 18 Feb 2026 04:24:46 -0500 Subject: [PATCH] improve completion suggestions inside backticks Previously the behavior was a little janky: on the first character after a backtick, only identifiers which might _require_ a backtick were offered as suggestions (because, for instance, they matched reserved words). Sometimes, that list would be empty, and no suggestions were offered. Then, after typing a few more characters, rapidfuzz matching kicked in, and more suggestions were offered, with backticks off for the additional rapidfuzz suggestions. Now the behavior is more consistent: if a backtick is typed, _all_ suggestions which could work in that place are offered, with uniform backticks on the suggestions, even if backticks are not required for the given identifier. Of course, how early the suggestions are offered is still dependent on the min_completion_trigger option in ~/.myclirc, so the above paragraph is conditional. Changes * tuck optimization check inside _find_doubled_backticks(), and make the optimization test for double instead of single backticks * remove function unescape_name(), which is unused, and seems wrongly named since it oriented towards strings rather than identifiers * let find_matches() add backticks to all suggestions if backtick quoting is detected at the cursor * recast a variable name in _find_doubled_backticks() Per some comments in the tests, it would be nicer if column names sorted more strongly to the top in the SELECT context, but that is not new to these changes. Another idea could be sorting to the top only those suggestions which require a backtick, when in the backtick context -- something for the future. We also seem to be needlessly suggesting some generic keywords in the SELECT context, but that is also not new to these changes. And if they are going to appear, it seems to make more sense to have them in backticks like the other suggestions, for the sake of uniformity. --- changelog.md | 2 + mycli/packages/completion_engine.py | 15 +- mycli/sqlcompleter.py | 148 ++++++++++--- ...est_smart_completion_public_schema_only.py | 206 ++++++++++++++++++ 4 files changed, 337 insertions(+), 34 deletions(-) diff --git a/changelog.md b/changelog.md index d60f17df..6320a1aa 100644 --- a/changelog.md +++ b/changelog.md @@ -4,6 +4,7 @@ Upcoming (TBD) Features --------- * `--checkup` now checks for external executables. +* Improve completion suggestions within backticks. Bug Fixes @@ -12,6 +13,7 @@ Bug Fixes * Don't diagnose free-entry sections such as `[favorite_queries]` in `--checkup`. * When accepting a filename completion, fill in leading `./` if given. + Internal -------- * Bump `cli_helpers` to non-yanked version. diff --git a/mycli/packages/completion_engine.py b/mycli/packages/completion_engine.py index ccc890ec..6e6a5103 100644 --- a/mycli/packages/completion_engine.py +++ b/mycli/packages/completion_engine.py @@ -45,8 +45,12 @@ def _is_where_or_having(token: Token | None) -> bool: def _find_doubled_backticks(text: str) -> list[int]: length = len(text) - doubled_backticks: list[int] = [] + doubled_backtick_positions: list[int] = [] backtick = '`' + two_backticks = backtick + backtick + + if two_backticks not in text: + return doubled_backtick_positions for index in range(0, length): ch = text[index] @@ -54,13 +58,13 @@ def _find_doubled_backticks(text: str) -> list[int]: index += 1 continue if index + 1 < length and text[index + 1] == backtick: - doubled_backticks.append(index) - doubled_backticks.append(index + 1) + doubled_backtick_positions.append(index) + doubled_backtick_positions.append(index + 1) index += 2 continue index += 1 - return doubled_backticks + return doubled_backtick_positions @functools.lru_cache(maxsize=128) @@ -76,8 +80,7 @@ def is_inside_quotes(text: str, pos: int) -> Literal[False, 'single', 'double', backslash = '\\' # scanning the string twice seems to be needed to handle doubled backticks - if backtick in text: - doubled_backtick_positions = _find_doubled_backticks(text) + doubled_backtick_positions = _find_doubled_backticks(text) length = len(text) if pos < 0: diff --git a/mycli/sqlcompleter.py b/mycli/sqlcompleter.py index c9f85162..de618c2f 100644 --- a/mycli/sqlcompleter.py +++ b/mycli/sqlcompleter.py @@ -11,7 +11,7 @@ from pygments.lexers._mysql_builtins import MYSQL_DATATYPES, MYSQL_FUNCTIONS, MYSQL_KEYWORDS import rapidfuzz -from mycli.packages.completion_engine import suggest_type +from mycli.packages.completion_engine import is_inside_quotes, suggest_type from mycli.packages.filepaths import complete_path, parse_path, suggest_path from mycli.packages.parseutils import extract_columns_from_select, last_word from mycli.packages.special import llm @@ -810,13 +810,6 @@ def escape_name(self, name: str) -> str: return name - def unescape_name(self, name: str) -> str: - """Unquote a string.""" - if name and name[0] == '"' and name[-1] == '"': - name = name[1:-1] - - return name - def escaped_names(self, names: Collection[str]) -> list[str]: return [self.escape_name(name) for name in names] @@ -974,6 +967,7 @@ def find_matches( start_only: bool = False, fuzzy: bool = True, casing: str | None = None, + text_before_cursor: str = '', ) -> Generator[tuple[str, int], None, None]: """Find completion matches for the given text. @@ -995,13 +989,26 @@ def find_matches( completions: list[tuple[str, int]] = [] + def maybe_quote_identifier(item: str) -> str: + if item.startswith('`'): + return item + if item == '*': + return item + return '`' + item + '`' + + # checking text.startswith() first is an optimization; is_inside_quotes() covers more cases + if text.startswith('`') or is_inside_quotes(text_before_cursor, len(text_before_cursor)) == 'backtick': + quoted_collection: Collection[Any] = [maybe_quote_identifier(x) if isinstance(x, str) else x for x in collection] + else: + quoted_collection = collection + if fuzzy: regex = ".{0,3}?".join(map(re.escape, text)) pat = re.compile(f'({regex})') under_words_text = [x for x in text.split('_') if x] case_words_text = re.split(case_change_pat, last) - for item in collection: + for item in quoted_collection: r = pat.search(item.lower()) if r: completions.append((item, Fuzziness.REGEX)) @@ -1032,7 +1039,7 @@ def find_matches( if len(text) >= 4: rapidfuzz_matches = rapidfuzz.process.extract( text, - collection, + quoted_collection, scorer=rapidfuzz.fuzz.WRatio, # todo: maybe make our own processor which only does case-folding # because underscores are valuable info @@ -1050,7 +1057,7 @@ def find_matches( else: match_end_limit = len(text) if start_only else None - for item in collection: + for item in quoted_collection: match_point = item.lower().find(text, 0, match_end_limit) if match_point >= 0: completions.append((item, Fuzziness.PERFECT)) @@ -1083,7 +1090,13 @@ def get_completions( # If smart_completion is off then match any word that starts with # 'word_before_cursor'. if not smart_completion: - matches = self.find_matches(word_before_cursor, self.all_completions, start_only=True, fuzzy=False) + matches = self.find_matches( + word_before_cursor, + self.all_completions, + start_only=True, + fuzzy=False, + text_before_cursor=document.text_before_cursor, + ) return (Completion(x[0], -len(text_for_len)) for x in matches) completions: list[tuple[str, int, int]] = [] @@ -1110,13 +1123,21 @@ def get_completions( # showing all columns. So make them unique and sort them. scoped_cols = sorted(set(scoped_cols), key=lambda s: s.strip('`')) - cols = self.find_matches(word_before_cursor, scoped_cols) + cols = self.find_matches( + word_before_cursor, + scoped_cols, + text_before_cursor=document.text_before_cursor, + ) completions.extend([(*x, rank) for x in cols]) elif suggestion["type"] == "function": # suggest user-defined functions using substring matching funcs = self.populate_schema_objects(suggestion["schema"], "functions") - user_funcs = self.find_matches(word_before_cursor, funcs) + user_funcs = self.find_matches( + word_before_cursor, + funcs, + text_before_cursor=document.text_before_cursor, + ) completions.extend([(*x, rank) for x in user_funcs]) # suggest hardcoded functions using startswith matching only if @@ -1125,13 +1146,22 @@ def get_completions( # eg: SELECT * FROM users u WHERE u. if not suggestion["schema"]: predefined_funcs = self.find_matches( - word_before_cursor, self.functions, start_only=True, fuzzy=False, casing=self.keyword_casing + word_before_cursor, + self.functions, + start_only=True, + fuzzy=False, + casing=self.keyword_casing, + text_before_cursor=document.text_before_cursor, ) completions.extend([(*x, rank) for x in predefined_funcs]) elif suggestion["type"] == "procedure": procs = self.populate_schema_objects(suggestion["schema"], "procedures") - procs_m = self.find_matches(word_before_cursor, procs) + procs_m = self.find_matches( + word_before_cursor, + procs, + text_before_cursor=document.text_before_cursor, + ) completions.extend([(*x, rank) for x in procs_m]) elif suggestion["type"] == "table": @@ -1144,53 +1174,107 @@ def get_completions( tables = self.populate_schema_objects(suggestion["schema"], "tables", columns) else: tables = self.populate_schema_objects(suggestion["schema"], "tables") - tables_m = self.find_matches(word_before_cursor, tables) + tables_m = self.find_matches( + word_before_cursor, + tables, + text_before_cursor=document.text_before_cursor, + ) completions.extend([(*x, rank) for x in tables_m]) elif suggestion["type"] == "view": views = self.populate_schema_objects(suggestion["schema"], "views") - views_m = self.find_matches(word_before_cursor, views) + views_m = self.find_matches( + word_before_cursor, + views, + text_before_cursor=document.text_before_cursor, + ) completions.extend([(*x, rank) for x in views_m]) elif suggestion["type"] == "alias": aliases = suggestion["aliases"] - aliases_m = self.find_matches(word_before_cursor, aliases) + aliases_m = self.find_matches( + word_before_cursor, + aliases, + text_before_cursor=document.text_before_cursor, + ) completions.extend([(*x, rank) for x in aliases_m]) elif suggestion["type"] == "database": - dbs_m = self.find_matches(word_before_cursor, self.databases) + dbs_m = self.find_matches( + word_before_cursor, + self.databases, + text_before_cursor=document.text_before_cursor, + ) completions.extend([(*x, rank) for x in dbs_m]) elif suggestion["type"] == "keyword": - keywords_m = self.find_matches(word_before_cursor, self.keywords, casing=self.keyword_casing) + keywords_m = self.find_matches( + word_before_cursor, + self.keywords, + casing=self.keyword_casing, + text_before_cursor=document.text_before_cursor, + ) completions.extend([(*x, rank) for x in keywords_m]) elif suggestion["type"] == "show": show_items_m = self.find_matches( - word_before_cursor, self.show_items, start_only=False, fuzzy=True, casing=self.keyword_casing + word_before_cursor, + self.show_items, + start_only=False, + fuzzy=True, + casing=self.keyword_casing, + text_before_cursor=document.text_before_cursor, ) completions.extend([(*x, rank) for x in show_items_m]) elif suggestion["type"] == "change": - change_items_m = self.find_matches(word_before_cursor, self.change_items, start_only=False, fuzzy=True) + change_items_m = self.find_matches( + word_before_cursor, + self.change_items, + start_only=False, + fuzzy=True, + text_before_cursor=document.text_before_cursor, + ) completions.extend([(*x, rank) for x in change_items_m]) elif suggestion["type"] == "user": - users_m = self.find_matches(word_before_cursor, self.users, start_only=False, fuzzy=True) + users_m = self.find_matches( + word_before_cursor, + self.users, + start_only=False, + fuzzy=True, + text_before_cursor=document.text_before_cursor, + ) completions.extend([(*x, rank) for x in users_m]) elif suggestion["type"] == "special": - special_m = self.find_matches(word_before_cursor, self.special_commands, start_only=True, fuzzy=False) + special_m = self.find_matches( + word_before_cursor, + self.special_commands, + start_only=True, + fuzzy=False, + text_before_cursor=document.text_before_cursor, + ) # specials are special, and go early in the candidates, first if possible completions.extend([(*x, 0) for x in special_m]) elif suggestion["type"] == "favoritequery": if hasattr(FavoriteQueries, 'instance') and hasattr(FavoriteQueries.instance, 'list'): - queries_m = self.find_matches(word_before_cursor, FavoriteQueries.instance.list(), start_only=False, fuzzy=True) + queries_m = self.find_matches( + word_before_cursor, + FavoriteQueries.instance.list(), + start_only=False, + fuzzy=True, + text_before_cursor=document.text_before_cursor, + ) completions.extend([(*x, rank) for x in queries_m]) elif suggestion["type"] == "table_format": - formats_m = self.find_matches(word_before_cursor, self.table_formats) + formats_m = self.find_matches( + word_before_cursor, + self.table_formats, + text_before_cursor=document.text_before_cursor, + ) completions.extend([(*x, rank) for x in formats_m]) elif suggestion["type"] == "file_name": @@ -1210,6 +1294,7 @@ def get_completions( possible_entries, start_only=False, fuzzy=True, + text_before_cursor=document.text_before_cursor, ) completions.extend([(*x, rank) for x in subcommands_m]) elif suggestion["type"] == "enum_value": @@ -1220,7 +1305,14 @@ def get_completions( ) if enum_values: quoted_values = [self._quote_sql_string(value) for value in enum_values] - completions = [(*x, rank) for x in self.find_matches(word_before_cursor, quoted_values)] + completions = [ + (*x, rank) + for x in self.find_matches( + word_before_cursor, + quoted_values, + text_before_cursor=document.text_before_cursor, + ) + ] break def completion_sort_key(item: tuple[str, int, int], text_for_len: str): diff --git a/test/test_smart_completion_public_schema_only.py b/test/test_smart_completion_public_schema_only.py index 3c6521ed..6dad48e5 100644 --- a/test/test_smart_completion_public_schema_only.py +++ b/test/test_smart_completion_public_schema_only.py @@ -752,3 +752,209 @@ def test_string_no_completion_spaces_inner_2(completer, complete_event): position = len('select "json ') result = list(completer.get_completions(Document(text=text, cursor_position=position), complete_event)) assert result == [] + + +def test_backticked_column_completion(completer, complete_event): + text = 'select `Tim' + position = len(text) + result = list(completer.get_completions(Document(text=text, cursor_position=position), complete_event)) + assert result == [ + # todo it would be nicer if the column names sorted to the top + Completion(text='`time`', start_position=-4), + Completion(text='`timediff`', start_position=-4), + Completion(text='`timestamp`', start_position=-4), + Completion(text='`time_format`', start_position=-4), + Completion(text='`time_to_sec`', start_position=-4), + Completion(text='`Time_zone_id`', start_position=-4), + Completion(text='`timestampadd`', start_position=-4), + Completion(text='`timestampdiff`', start_position=-4), + Completion(text='`datetime`', start_position=-4), + Completion(text='`optimize`', start_position=-4), + Completion(text='`optimizer_costs`', start_position=-4), + Completion(text='`utc_time`', start_position=-4), + Completion(text='`utc_timestamp`', start_position=-4), + Completion(text='`current_time`', start_position=-4), + Completion(text='`current_timestamp`', start_position=-4), + Completion(text='`localtime`', start_position=-4), + Completion(text='`localtimestamp`', start_position=-4), + Completion(text='`password_lock_time`', start_position=-4), + ] + + +def test_backticked_column_completion_component(completer, complete_event): + text = 'select `com' + position = len(text) + result = list(completer.get_completions(Document(text=text, cursor_position=position), complete_event)) + assert result == [ + # todo it would be nicer if "comment" sorted to the top because it is a column name, + # and because it is a reserved word + Completion(text='`commit`', start_position=-4), + Completion(text='`comment`', start_position=-4), + Completion(text='`compact`', start_position=-4), + Completion(text='`compress`', start_position=-4), + Completion(text='`committed`', start_position=-4), + Completion(text='`component`', start_position=-4), + Completion(text='`completion`', start_position=-4), + Completion(text='`compressed`', start_position=-4), + Completion(text='`compression`', start_position=-4), + Completion(text='`column`', start_position=-4), + Completion(text='`column_format`', start_position=-4), + Completion(text='`column_name`', start_position=-4), + Completion(text='`columns`', start_position=-4), + Completion(text='`second_microsecond`', start_position=-4), + Completion(text='`uncommitted`', start_position=-4), + ] + + +def test_backticked_column_completion_two_character(completer, complete_event): + text = 'select `f' + position = len(text) + result = list(completer.get_completions(Document(text=text, cursor_position=position), complete_event)) + assert result == [ + # todo it would be nicer if the column name "first_name" sorted to the top + Completion(text='`for`', start_position=-2), + Completion(text='`from`', start_position=-2), + Completion(text='`fast`', start_position=-2), + Completion(text='`file`', start_position=-2), + Completion(text='`full`', start_position=-2), + Completion(text='`field`', start_position=-2), + Completion(text='`floor`', start_position=-2), + Completion(text='`fixed`', start_position=-2), + Completion(text='`float`', start_position=-2), + Completion(text='`false`', start_position=-2), + Completion(text='`fetch`', start_position=-2), + Completion(text='`first`', start_position=-2), + Completion(text='`flush`', start_position=-2), + Completion(text='`force`', start_position=-2), + Completion(text='`found`', start_position=-2), + Completion(text='`float4`', start_position=-2), + Completion(text='`float8`', start_position=-2), + Completion(text='`factor`', start_position=-2), + Completion(text='`faults`', start_position=-2), + Completion(text='`fields`', start_position=-2), + Completion(text='`filter`', start_position=-2), + Completion(text='`finish`', start_position=-2), + Completion(text='`format`', start_position=-2), + Completion(text='`follows`', start_position=-2), + Completion(text='`foreign`', start_position=-2), + Completion(text='`fulltext`', start_position=-2), + Completion(text='`function`', start_position=-2), + Completion(text='`from_days`', start_position=-2), + Completion(text='`following`', start_position=-2), + Completion(text='`first_name`', start_position=-2), + Completion(text='`found_rows`', start_position=-2), + Completion(text='`find_in_set`', start_position=-2), + Completion(text='`from_base64`', start_position=-2), + Completion(text='`first_value`', start_position=-2), + Completion(text='`foreign key`', start_position=-2), + Completion(text='`format_bytes`', start_position=-2), + Completion(text='`from_unixtime`', start_position=-2), + Completion(text='`file_block_size`', start_position=-2), + Completion(text='`format_pico_time`', start_position=-2), + Completion(text='`failed_login_attempts`', start_position=-2), + Completion(text='`left join`', start_position=-2), + Completion(text='`after`', start_position=-2), + Completion(text='`before`', start_position=-2), + Completion(text='`default`', start_position=-2), + Completion(text='`default_auth`', start_position=-2), + Completion(text='`definer`', start_position=-2), + Completion(text='`definition`', start_position=-2), + Completion(text='`enforced`', start_position=-2), + Completion(text='`if`', start_position=-2), + Completion(text='`infile`', start_position=-2), + Completion(text='`left`', start_position=-2), + Completion(text='`logfile`', start_position=-2), + Completion(text='`of`', start_position=-2), + Completion(text='`off`', start_position=-2), + Completion(text='`offset`', start_position=-2), + Completion(text='`outfile`', start_position=-2), + Completion(text='`profile`', start_position=-2), + Completion(text='`profiles`', start_position=-2), + Completion(text='`reference`', start_position=-2), + Completion(text='`references`', start_position=-2), + ] + + +def test_backticked_column_completion_three_character(completer, complete_event): + text = 'select `fi' + position = len(text) + result = list(completer.get_completions(Document(text=text, cursor_position=position), complete_event)) + assert result == [ + # todo it would be nicer if the column name "first_name" sorted to the top + Completion(text='`file`', start_position=-3), + Completion(text='`field`', start_position=-3), + Completion(text='`fixed`', start_position=-3), + Completion(text='`first`', start_position=-3), + Completion(text='`fields`', start_position=-3), + Completion(text='`filter`', start_position=-3), + Completion(text='`finish`', start_position=-3), + Completion(text='`first_name`', start_position=-3), + Completion(text='`find_in_set`', start_position=-3), + Completion(text='`first_value`', start_position=-3), + Completion(text='`file_block_size`', start_position=-3), + Completion(text='`definer`', start_position=-3), + Completion(text='`definition`', start_position=-3), + Completion(text='`failed_login_attempts`', start_position=-3), + Completion(text='`foreign`', start_position=-3), + Completion(text='`infile`', start_position=-3), + Completion(text='`logfile`', start_position=-3), + Completion(text='`outfile`', start_position=-3), + Completion(text='`profile`', start_position=-3), + Completion(text='`profiles`', start_position=-3), + Completion(text='`foreign key`', start_position=-3), + ] + + +def test_backticked_column_completion_four_character(completer, complete_event): + text = 'select `fir' + position = len(text) + result = list(completer.get_completions(Document(text=text, cursor_position=position), complete_event)) + assert result == [ + # todo it would be nicer if the column name "first_name" sorted to the top + Completion(text='`first`', start_position=-4), + Completion(text='`first_name`', start_position=-4), + Completion(text='`first_value`', start_position=-4), + Completion(text='`definer`', start_position=-4), + Completion(text='`filter`', start_position=-4), + ] + + +def test_backticked_table_completion_required(completer, complete_event): + text = 'select ABC from `rév' + position = len(text) + result = list(completer.get_completions(Document(text=text, cursor_position=position), complete_event)) + assert result == [ + Completion(text='`réveillé`', start_position=-4), + ] + + +def test_backticked_table_completion_not_required(completer, complete_event): + text = 'select * from `t' + position = len(text) + result = list(completer.get_completions(Document(text=text, cursor_position=position), complete_event)) + assert result == [ + Completion(text='`test`', start_position=-2), + Completion(text='`test 2`', start_position=-2), + Completion(text='`time_zone`', start_position=-2), + Completion(text='`time_zone_name`', start_position=-2), + Completion(text='`time_zone_transition`', start_position=-2), + Completion(text='`time_zone_leap_second`', start_position=-2), + Completion(text='`time_zone_transition_type`', start_position=-2), + ] + + +def test_string_no_completion_backtick(completer, complete_event): + text = 'select * from "`t' + position = len(text) + result = list(completer.get_completions(Document(text=text, cursor_position=position), complete_event)) + assert result == [] + + +# todo this shouldn't suggest anything but the space resets the logic +# and it completes on "bar" alone +@pytest.mark.xfail +def test_backticked_no_completion_spaces(completer, complete_event): + text = 'select * from `nocomplete bar' + position = len(text) + result = list(completer.get_completions(Document(text=text, cursor_position=position), complete_event)) + assert result == []