Skip to content

Commit 7ee3465

Browse files
CM-57848-Fix UTF encoding when displaying code snippet
1 parent 3c73e3d commit 7ee3465

File tree

5 files changed

+103
-3
lines changed

5 files changed

+103
-3
lines changed

cycode/cli/printers/tables/table_printer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from cycode.cli.printers.tables.table_printer_base import TablePrinterBase
99
from cycode.cli.printers.utils import is_git_diff_based_scan
1010
from cycode.cli.printers.utils.detection_ordering.common_ordering import sort_and_group_detections_from_scan_result
11-
from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text
11+
from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text, sanitize_text_for_encoding
1212

1313
if TYPE_CHECKING:
1414
from cycode.cli.models import LocalScanResult
@@ -96,6 +96,8 @@ def _enrich_table_with_detection_code_segment_values(
9696
if not self.show_secret:
9797
violation = obfuscate_text(violation)
9898

99+
violation = sanitize_text_for_encoding(violation)
100+
99101
table.add_cell(LINE_NUMBER_COLUMN, str(detection_line))
100102
table.add_cell(COLUMN_NUMBER_COLUMN, str(detection_column))
101103
table.add_cell(VIOLATION_LENGTH_COLUMN, f'{violation_length} chars')

cycode/cli/printers/utils/code_snippet_syntax.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from cycode.cli import consts
66
from cycode.cli.console import _SYNTAX_HIGHLIGHT_THEME
77
from cycode.cli.printers.utils import is_git_diff_based_scan
8-
from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text
8+
from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text, sanitize_text_for_encoding
99

1010
if TYPE_CHECKING:
1111
from cycode.cli.models import Document
@@ -72,6 +72,7 @@ def _get_code_snippet_syntax_from_file(
7272
code_lines_to_render.append(line_content)
7373

7474
code_to_render = '\n'.join(code_lines_to_render)
75+
code_to_render = sanitize_text_for_encoding(code_to_render)
7576
return _get_syntax_highlighted_code(
7677
code=code_to_render,
7778
lexer=Syntax.guess_lexer(document.path, code=code_to_render),
@@ -94,6 +95,7 @@ def _get_code_snippet_syntax_from_git_diff(
9495
violation = line_content[detection_position_in_line : detection_position_in_line + violation_length]
9596
line_content = line_content.replace(violation, obfuscate_text(violation))
9697

98+
line_content = sanitize_text_for_encoding(line_content)
9799
return _get_syntax_highlighted_code(
98100
code=line_content,
99101
lexer='diff',

cycode/cli/printers/utils/rich_helpers.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from rich.panel import Panel
66

77
from cycode.cli.console import console
8+
from cycode.cli.utils.string_utils import sanitize_text_for_encoding
89

910
if TYPE_CHECKING:
1011
from rich.console import RenderableType
@@ -20,8 +21,9 @@ def get_panel(renderable: 'RenderableType', title: str) -> Panel:
2021

2122

2223
def get_markdown_panel(markdown_text: str, title: str) -> Panel:
24+
sanitized_text = sanitize_text_for_encoding(markdown_text.strip())
2325
return get_panel(
24-
Markdown(markdown_text.strip()),
26+
Markdown(sanitized_text),
2527
title=title,
2628
)
2729

cycode/cli/utils/string_utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,12 @@ def shortcut_dependency_paths(dependency_paths_list: str) -> str:
6565
result += '\n'
6666

6767
return result.rstrip().rstrip(',')
68+
69+
70+
def sanitize_text_for_encoding(text: str) -> str:
71+
"""Sanitize text by replacing surrogate characters and invalid UTF-8 sequences.
72+
73+
This prevents encoding errors when Rich tries to display the content, especially on Windows.
74+
Surrogate characters (U+D800 to U+DFFF) cannot be encoded to UTF-8 and will cause errors.
75+
"""
76+
return text.encode('utf-8', errors='replace').decode('utf-8')
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
"""Tests for Rich encoding fix to handle surrogate characters."""
2+
3+
from io import StringIO
4+
from unittest.mock import MagicMock
5+
6+
from rich.console import Console
7+
8+
from cycode.cli import consts
9+
from cycode.cli.models import Document
10+
from cycode.cli.printers.rich_printer import RichPrinter
11+
from cycode.cyclient.models import Detection
12+
13+
14+
def create_strict_encoding_console() -> tuple[Console, StringIO]:
15+
"""Create a Console that enforces strict UTF-8 encoding, simulating Windows console behavior.
16+
17+
When Rich writes to the console, the file object needs to encode strings to bytes.
18+
With errors='strict' (default for TextIOWrapper), this raises UnicodeEncodeError on surrogates.
19+
This function simulates that behavior to test the encoding fix.
20+
"""
21+
buffer = StringIO()
22+
23+
class StrictEncodingWrapper:
24+
def __init__(self, file_obj: StringIO) -> None:
25+
self._file = file_obj
26+
27+
def write(self, text: str) -> int:
28+
"""Validate encoding before writing to simulate strict encoding behavior."""
29+
text.encode('utf-8')
30+
return self._file.write(text)
31+
32+
def flush(self) -> None:
33+
self._file.flush()
34+
35+
def isatty(self) -> bool:
36+
return False
37+
38+
def __getattr__(self, name: str):
39+
# Delegate all other attributes to the underlying file
40+
return getattr(self._file, name)
41+
42+
strict_file = StrictEncodingWrapper(buffer)
43+
console = Console(file=strict_file, width=80, force_terminal=False)
44+
return console, buffer
45+
46+
47+
def test_rich_printer_handles_surrogate_characters_in_violation_card() -> None:
48+
"""Test that RichPrinter._print_violation_card() handles surrogate characters without errors.
49+
50+
The error occurs in Rich's console._write_buffer() -> write() when console.print() is called.
51+
On Windows with strict encoding, this raises UnicodeEncodeError on surrogates.
52+
"""
53+
surrogate_char = chr(0xDC96)
54+
document_content = 'A' * 1236 + surrogate_char + 'B' * 100
55+
document = Document(
56+
path='test.py',
57+
content=document_content,
58+
is_git_diff_format=False,
59+
)
60+
61+
detection = Detection(
62+
detection_type_id='test-id',
63+
type='test-type',
64+
message='Test message',
65+
detection_details={
66+
'description': 'Summary with ' + surrogate_char + ' surrogate character',
67+
'policy_display_name': 'Test Policy',
68+
'start_position': 1236,
69+
'length': 1,
70+
'line': 0,
71+
},
72+
detection_rule_id='test-rule-id',
73+
severity='Medium',
74+
)
75+
76+
mock_ctx = MagicMock()
77+
mock_ctx.obj = {
78+
'scan_type': consts.SAST_SCAN_TYPE,
79+
'show_secret': False,
80+
}
81+
mock_ctx.info_name = consts.SAST_SCAN_TYPE
82+
83+
console, _ = create_strict_encoding_console()
84+
printer = RichPrinter(mock_ctx, console, console)
85+
printer._print_violation_card(document, detection, 1, 1)

0 commit comments

Comments
 (0)