Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions smart_tests/utils/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,23 @@ def __init__(self, id: str):
'''This is the method in which we parse the user input, so be defensive'''
if id.startswith('@'):
file_path = id[1:]
# Earlier versions of PowerShell write Unicode BOM when redirecting output to a file.
# https://github.com/PowerShell/PowerShell/issues/8592
# Since we tell people to redirect `record session` output to a file, here we can
# encounter such files. Here's the scheme to cope with this.
#
# First we try utf-8-sig, which handles UTF-8 BOM correctly.
# our session ID only uses ASCII chars, so unless the writer used non ascii compatible encoding
# (e.g., EBCDIC but those are very very unlikely), this will read the file correctly.
# If the writer used UTF-16 (e.g., legacy PowerShell on Windows), we'll get a decode error, and
# then we try UTF-16, which handles BOM correctly.
try:
with open(file_path, 'r') as f:
id = f.read().strip()
try:
with open(file_path, 'r', encoding='utf-8-sig') as f:
id = f.read().strip()
except UnicodeDecodeError:
with open(file_path, 'r', encoding='utf-16') as f:
id = f.read().strip()
Comment on lines +47 to +49
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the file isn’t valid UTF-8 or UTF-16 (e.g., latin-1, UTF-16 without BOM, or corrupted), the UTF-16 fallback can raise UnicodeError (often “UTF-16 stream does not start with BOM”), which currently bypasses the outer FileNotFoundError/IOError handling and will surface as an unhandled exception. Consider catching UnicodeError around the decoding attempts and re-raising as BadCmdLineException with a clear message (or iterating through candidate encodings and failing gracefully).

Suggested change
except UnicodeDecodeError:
with open(file_path, 'r', encoding='utf-16') as f:
id = f.read().strip()
except UnicodeError:
try:
with open(file_path, 'r', encoding='utf-16') as f:
id = f.read().strip()
except UnicodeError as e:
raise BadCmdLineException(
f"Session file '{file_path}' is not a valid UTF-8 or UTF-16 text file: {e}"
)

Copilot uses AI. Check for mistakes.
except FileNotFoundError:
raise BadCmdLineException(f"Session file '{file_path}' not found.")
except IOError as e:
Expand Down
41 changes: 41 additions & 0 deletions tests/utils/test_session.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import tempfile
from unittest import mock

import responses
Expand Down Expand Up @@ -46,3 +47,43 @@ def test_get_session(self):
with self.assertRaises(SystemExit) as cm:
get_session(SessionId(self.session), client)
self.assertEqual(cm.exception.code, 1)


class TestSessionId(CliTestCase):
"""Test SessionId initialization and file reading with various encodings"""

def setUp(self):
super().setUp()
# A valid session ID for testing
self.valid_session_id = f"builds/{self.build_name}/test_sessions/{self.session_id}"

def _assert_session_from_file(self, encoding: str, content: str):
"""Helper method to test reading session ID from a file with specific encoding"""
with tempfile.NamedTemporaryFile(mode='w', encoding=encoding, delete=False, suffix='.txt') as f:
f.write(content)
temp_path = f.name

try:
session = SessionId(f"@{temp_path}")
self.assertEqual(str(session), self.valid_session_id)
self.assertEqual(session.build_part, self.build_name)
self.assertEqual(session.test_part, self.session_id)
finally:
os.unlink(temp_path)

def test_session_id_from_utf8_file_without_bom(self):
"""Test reading session ID from a UTF-8 file without BOM"""
# also, extra NL
self._assert_session_from_file('utf-8', f"{self.valid_session_id}\n")

def test_session_id_from_utf8_file_with_bom(self):
"""Test reading session ID from a UTF-8 file with BOM (UTF-8 signature)"""
self._assert_session_from_file('utf-8-sig', self.valid_session_id)

def test_session_id_from_utf16_le_file(self):
"""Test reading session ID from a UTF-16 LE file with BOM (PowerShell default on Windows)"""
self._assert_session_from_file('utf-16-le', f'\ufeff{self.valid_session_id}')

def test_session_id_from_utf16_file(self):
"""Test reading session ID from a UTF-16 file with BOM (using utf-16 encoding)"""
self._assert_session_from_file('utf-16', self.valid_session_id)
Loading