Skip to content

Commit 0ac457e

Browse files
committed
IndentationInfo.from_content: fix corner case of a single indent count
1 parent dfecc9e commit 0ac457e

File tree

1 file changed

+30
-22
lines changed

1 file changed

+30
-22
lines changed

src/text_manipulation/indentation_kit.py

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -194,28 +194,9 @@ def from_content(cls, content: str | Sequence[str]) -> 'IndentationInfo':
194194

195195
indent_lengths = [len(indent) for indent in indentations]
196196

197-
if dominant_char == '\t':
198-
char_count = 1
199-
else:
200-
# For spaces, determine the most likely char_count
201-
space_counts = [sc for sc in indent_lengths if sc % 2 == 0]
202-
if not space_counts:
203-
char_count = 2 # Default to 2 if no even space counts
204-
else:
205-
unique_space_counts = sorted(set(space_counts))
206-
deltas = sorted([b - a for a, b in zip(unique_space_counts, unique_space_counts[1:])], reverse=True)
207-
most_common_deltas = Counter(deltas).most_common(5)
208-
ratio_most_common = most_common_deltas[0][1] / len(deltas)
209-
if ratio_most_common > .6:
210-
char_count = most_common_deltas[0][0]
211-
else:
212-
char_count = deltas[0]
213-
# find the largest GCD
214-
for i in range(1, len(most_common_deltas)):
215-
new_gcd = gcd(char_count, most_common_deltas[i][0])
216-
if new_gcd <= 1:
217-
break
218-
char_count = new_gcd
197+
char_count = 1
198+
if dominant_char != '\t':
199+
char_count = cls.calc_space_count_for_indent(indent_lengths)
219200

220201
min_indent_chars = 0 if has_zero_indent else min(indent_lengths) if indent_lengths else 0
221202
min_indent_level = min_indent_chars // char_count
@@ -234,6 +215,33 @@ def from_content(cls, content: str | Sequence[str]) -> 'IndentationInfo':
234215

235216
return cls(char_count, dominant_char, min_indent_level, consistency, message)
236217

218+
@staticmethod
219+
def calc_space_count_for_indent(indent_lengths: Sequence[int]) -> int:
220+
# For spaces, determine the most likely char_count
221+
space_counts = [sc for sc in indent_lengths if sc % 2 == 0]
222+
if not space_counts:
223+
return 2 # Default to 2 if no even space counts
224+
225+
unique_space_counts = sorted(set(space_counts))
226+
if len(unique_space_counts) == 1:
227+
return unique_space_counts[0]
228+
229+
deltas = sorted([b - a for a, b in zip(unique_space_counts, unique_space_counts[1:])], reverse=True)
230+
most_common_deltas = Counter(deltas).most_common(5)
231+
ratio_most_common = most_common_deltas[0][1] / len(deltas)
232+
if ratio_most_common > .6:
233+
return most_common_deltas[0][0]
234+
235+
# Resort to GCD
236+
result = deltas[0]
237+
# find the largest GCD
238+
for i in range(1, len(most_common_deltas)):
239+
new_gcd = gcd(result, most_common_deltas[i][0])
240+
if new_gcd <= 1:
241+
break
242+
result = new_gcd
243+
return result
244+
237245
def update_min_indent_level(self, content: str | Sequence[str]) -> 'IndentationInfo':
238246
return self._replace(min_indent_level=IndentationInfo.from_content(content).min_indent_level)
239247

0 commit comments

Comments
 (0)