From 60ca25c9148dc11c86c94b0765cccdfef11f67a1 Mon Sep 17 00:00:00 2001 From: TheLeoP Date: Sun, 28 Dec 2025 22:31:22 -0500 Subject: [PATCH] fix(ALL): improve `vim.str_utfindex` and `vim.str_byteindex` usage --- lua/mini/align.lua | 5 ++++- lua/mini/completion.lua | 5 ++++- lua/mini/diff.lua | 5 ++++- lua/mini/icons.lua | 5 ++++- lua/mini/jump2d.lua | 10 ++++++++-- lua/mini/map.lua | 11 +++++------ lua/mini/operators.lua | 10 ++++++++-- lua/mini/pick.lua | 10 ++++++++-- lua/mini/surround.lua | 10 ++++++++-- 9 files changed, 53 insertions(+), 18 deletions(-) diff --git a/lua/mini/align.lua b/lua/mini/align.lua index f7b2dd37..f299c601 100644 --- a/lua/mini/align.lua +++ b/lua/mini/align.lua @@ -2031,9 +2031,12 @@ if vim.fn.has('nvim-0.10') == 0 then end end +H.str_utfindex = function(s, i) return vim.str_utfindex(s, 'utf-32', i) end +if vim.fn.has('nvim-0.11') == 0 then H.str_utfindex = function(s, i) return (vim.str_utfindex(s, i)) end end + H.str_utf_end = function(s, n) return n >= s:len() and 0 or vim.str_utf_end(s, n) end if vim.fn.has('nvim-0.10') == 0 then - H.str_utf_end = function(s, n) return n >= s:len() and 0 or (vim.str_byteindex(s, vim.str_utfindex(s, n)) - n) end + H.str_utf_end = function(s, n) return n >= s:len() and 0 or (vim.str_byteindex(s, H.str_utfindex(s, n)) - n) end end H.is_any_point_inside_any_span = function(points, spans) diff --git a/lua/mini/completion.lua b/lua/mini/completion.lua index f2ef407f..ec81e1c4 100644 --- a/lua/mini/completion.lua +++ b/lua/mini/completion.lua @@ -1917,6 +1917,9 @@ H.fit_to_width = function(text, width) return t_width <= width and text or ('…' .. vim.fn.strcharpart(text, t_width - width + 1, width - 1)) end +H.str_byteindex = function(s, i) return vim.str_byteindex(s, 'utf-32', i) end +if vim.fn.has('nvim-0.11') == 0 then H.str_byteindex = function(s, i) return vim.str_byteindex(s, i) end end + -- Simulate splitting single line `l` like how it would look inside window with -- `wrap` and `linebreak` set to `true` H.wrap_line = function(l, width) @@ -1928,7 +1931,7 @@ H.wrap_line = function(l, width) -- Simulate wrap by looking at breaking character from end of current break -- Use `pcall()` to handle complicated multibyte characters (like Chinese) -- for which even `strdisplaywidth()` seems to return incorrect values. - success, width_id = pcall(vim.str_byteindex, l, width) + success, width_id = pcall(H.str_byteindex, l, width) if success then local break_match = vim.fn.match(l:sub(1, width_id):reverse(), '[- \t.,;:!?]') diff --git a/lua/mini/diff.lua b/lua/mini/diff.lua index 42a25fb6..fddf111e 100644 --- a/lua/mini/diff.lua +++ b/lua/mini/diff.lua @@ -1488,12 +1488,15 @@ H.compute_worddiff_changed_parts = function(ref_line, buf_line) return ref_ranges, buf_ranges end +H.str_utfindex = function(s, i) return vim.str_utfindex(s, 'utf-32', i) end +if vim.fn.has('nvim-0.11') == 0 then H.str_utfindex = function(s, i) return (vim.str_utfindex(s, i)) end end + H.slice_line = function(line) -- Intertwine every proper character with '\n' local line_len = line:len() local sliced, starts, ends -- Make short route for a very common case of no multibyte characters - if vim.str_utfindex(line) == line_len then + if H.str_utfindex(line) == line_len then sliced, starts, ends = line:gsub('(.)', '%1\n'), {}, {} for i = 1, string.len(line) do starts[i], ends[i] = i, i diff --git a/lua/mini/icons.lua b/lua/mini/icons.lua index 26f61ccb..03793f8e 100644 --- a/lua/mini/icons.lua +++ b/lua/mini/icons.lua @@ -2132,10 +2132,13 @@ H.get_impl = { os = function(name) return H.os_icons[name] end, } +H.str_byteindex = function(s, i) return vim.str_byteindex(s, 'utf-32', i) end +if vim.fn.has('nvim-0.11') == 0 then H.str_byteindex = function(s, i) return vim.str_byteindex(s, i) end end + H.style_icon = function(glyph, name) if MiniIcons.config.style ~= 'ascii' then return glyph end -- Use `vim.str_byteindex()` and `vim.fn.toupper()` for multibyte characters - return vim.fn.toupper(name:sub(1, vim.str_byteindex(name, 1))) + return vim.fn.toupper(name:sub(1, H.str_byteindex(name, 1))) end H.filetype_match = function(filename) diff --git a/lua/mini/jump2d.lua b/lua/mini/jump2d.lua index 633cb11d..e03e6b92 100644 --- a/lua/mini/jump2d.lua +++ b/lua/mini/jump2d.lua @@ -478,8 +478,8 @@ MiniJump2d.gen_spotter.pattern = function(pattern, side) -- Unify how spot is chosen in case of multibyte characters -- Use `+-1` to make sure that result is at start of multibyte character - local utf_index = vim.str_utfindex(line, spot) - 1 - spot = vim.str_byteindex(line, utf_index) + 1 + local utf_index = H.str_utfindex(line, spot) - 1 + spot = H.str_byteindex(line, utf_index) + 1 -- Add spot only if it referces new actually visible column if spot ~= res[#res] then table.insert(res, spot) end @@ -1226,4 +1226,10 @@ H.merge_unique = function(tbl_1, tbl_2) return res end +H.str_utfindex = function(s, i) return vim.str_utfindex(s, 'utf-32', i) end +if vim.fn.has('nvim-0.11') == 0 then H.str_utfindex = function(s, i) return (vim.str_utfindex(s, i)) end end + +H.str_byteindex = function(s, i) return vim.str_byteindex(s, 'utf-32', i) end +if vim.fn.has('nvim-0.11') == 0 then H.str_byteindex = function(s, i) return vim.str_byteindex(s, i) end end + return MiniJump2d diff --git a/lua/mini/map.lua b/lua/mini/map.lua index 8a945f77..e0a70862 100644 --- a/lua/mini/map.lua +++ b/lua/mini/map.lua @@ -1159,7 +1159,7 @@ H.mask_from_strings = function(strings, _) local mask_row = H.tbl_repeat(true, n_cols) -- Detect whitespace - s_ext:gsub('()%s', function(j) mask_row[vim.str_utfindex(s_ext, j)] = false end) + s_ext:gsub('()%s', function(j) mask_row[H.str_utfindex(s_ext, j)] = false end) res[i] = mask_row end @@ -1679,11 +1679,10 @@ end H.set_extmark_safely = function(...) pcall(vim.api.nvim_buf_set_extmark, ...) end -H.str_width = function(x) - -- Use first returned value (UTF-32 index, and not UTF-16 one) - local res = vim.str_utfindex(x) - return res -end +H.str_utfindex = function(s, i) return vim.str_utfindex(s, 'utf-32', i) end +if vim.fn.has('nvim-0.11') == 0 then H.str_utfindex = function(s, i) return (vim.str_utfindex(s, i)) end end + +H.str_width = function(x) return H.str_utfindex(x) end H.tbl_repeat = function(x, n) local res = {} diff --git a/lua/mini/operators.lua b/lua/mini/operators.lua index 1cafa3bf..42007500 100644 --- a/lua/mini/operators.lua +++ b/lua/mini/operators.lua @@ -1211,10 +1211,16 @@ H.get_mark = function(mark_name) return vim.api.nvim_buf_get_mark(0, mark_name) H.set_mark = function(mark_name, mark_data) vim.api.nvim_buf_set_mark(0, mark_name, mark_data[1], mark_data[2], {}) end +H.str_utfindex = function(s, i) return vim.str_utfindex(s, 'utf-32', i) end +if vim.fn.has('nvim-0.11') == 0 then H.str_utfindex = function(s, i) return (vim.str_utfindex(s, i)) end end + +H.str_byteindex = function(s, i) return vim.str_byteindex(s, 'utf-32', i) end +if vim.fn.has('nvim-0.11') == 0 then H.str_byteindex = function(s, i) return vim.str_byteindex(s, i) end end + H.get_next_char_bytecol = function(markcoords) local line = vim.fn.getline(markcoords[1]) - local utf_index = vim.str_utfindex(line, math.min(line:len(), markcoords[2] + 1)) - return vim.str_byteindex(line, utf_index) + local utf_index = H.str_utfindex(line, math.min(line:len(), markcoords[2] + 1)) + return H.str_byteindex(line, utf_index) end -- Indent --------------------------------------------------------------------- diff --git a/lua/mini/pick.lua b/lua/mini/pick.lua index 803f5d8e..37a8d552 100644 --- a/lua/mini/pick.lua +++ b/lua/mini/pick.lua @@ -3662,10 +3662,16 @@ H.seq_along = function(arr) return res end +H.str_utfindex = function(s, i) return vim.str_utfindex(s, 'utf-32', i) end +if vim.fn.has('nvim-0.11') == 0 then H.str_utfindex = function(s, i) return (vim.str_utfindex(s, i)) end end + +H.str_byteindex = function(s, i) return vim.str_byteindex(s, 'utf-32', i) end +if vim.fn.has('nvim-0.11') == 0 then H.str_byteindex = function(s, i) return vim.str_byteindex(s, i) end end + H.get_next_char_bytecol = function(line_str, col) if type(line_str) ~= 'string' then return col end - local utf_index = vim.str_utfindex(line_str, math.min(line_str:len(), col)) - return vim.str_byteindex(line_str, utf_index) + local utf_index = H.str_utfindex(line_str, math.min(line_str:len(), col)) + return H.str_byteindex(line_str, utf_index) end H.is_file_text = function(path) diff --git a/lua/mini/surround.lua b/lua/mini/surround.lua index 418b9730..aceeb678 100644 --- a/lua/mini/surround.lua +++ b/lua/mini/surround.lua @@ -1877,6 +1877,12 @@ H.is_point_inside_spans = function(point, spans) return false end +H.str_utfindex = function(s, i) return vim.str_utfindex(s, 'utf-32', i) end +if vim.fn.has('nvim-0.11') == 0 then H.str_utfindex = function(s, i) return (vim.str_utfindex(s, i)) end end + +H.str_byteindex = function(s, i) return vim.str_byteindex(s, 'utf-32', i) end +if vim.fn.has('nvim-0.11') == 0 then H.str_byteindex = function(s, i) return vim.str_byteindex(s, i) end end + -- Work with operator marks --------------------------------------------------- H.get_marks_pos = function(mode) -- Region is inclusive on both ends @@ -1922,10 +1928,10 @@ H.get_marks_pos = function(mode) -- Use `math.min()` because it might lead to 'index out of range' error -- when mark is positioned at the end of line (that extra space which is -- selected when selecting with `v$`) - local utf_index = vim.str_utfindex(line2, math.min(#line2, pos2[2])) + local utf_index = H.str_utfindex(line2, math.min(#line2, pos2[2])) -- This returns the last byte inside character because `vim.str_byteindex()` -- 'rounds upwards to the end of that sequence'. - pos2[2] = vim.str_byteindex(line2, utf_index) + pos2[2] = H.str_byteindex(line2, utf_index) end return {