From 7193a689b2348d8d15fa7ee7ee074b4b54734d54 Mon Sep 17 00:00:00 2001 From: Kohsuke Kawaguchi Date: Mon, 9 Feb 2026 14:23:50 -0800 Subject: [PATCH 1/3] Handle Unicode BOM in the session file See how PowerShell creates a file with that when redirecting with '>' See: https://github.com/PowerShell/PowerShell/issues/8592 --- smart_tests/utils/session.py | 18 ++++++++++++++-- tests/utils/test_session.py | 41 ++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/smart_tests/utils/session.py b/smart_tests/utils/session.py index fd9fcf9b6..b5e3369a2 100644 --- a/smart_tests/utils/session.py +++ b/smart_tests/utils/session.py @@ -30,9 +30,23 @@ def __init__(self, id: str): '''This is the method in which we parse the user input, so be defensive''' if id.startswith('@'): file_path = id[1:] + # Earlier versions of PowerShell writes Unicode BOM when redirecting output to a file. + # https://github.com/PowerShell/PowerShell/issues/8592 + # Since we tell people to redirect `record session` output to a file, here we can + # encounter such files. Here's the scheme to cope with this. + # + # First we try utf-8-sig, which handles UTF-8 BOM correctly. + # our session ID only uses ASCII chars, so unless the writer used non ascii compatible encoding + # (e.g., EBCDIC but those are very very unlikely), this will read the file correctly. + # If the writer used UTF-16 (e.g., legacy PowerShell on Windows), we'll get a decode error, an + # then we try UTF-16, which handles BOM correctly. try: - with open(file_path, 'r') as f: - id = f.read().strip() + try: + with open(file_path, 'r', encoding='utf-8-sig') as f: + id = f.read().strip() + except UnicodeDecodeError: + with open(file_path, 'r', encoding='utf-16') as f: + id = f.read().strip() except FileNotFoundError: raise BadCmdLineException(f"Session file '{file_path}' not found.") except IOError as e: diff --git a/tests/utils/test_session.py b/tests/utils/test_session.py index 94b9ca7d2..18b5313d9 100644 --- a/tests/utils/test_session.py +++ b/tests/utils/test_session.py @@ -1,4 +1,5 @@ import os +import tempfile from unittest import mock import responses @@ -46,3 +47,43 @@ def test_get_session(self): with self.assertRaises(SystemExit) as cm: get_session(SessionId(self.session), client) self.assertEqual(cm.exception.code, 1) + + +class TestSessionId(CliTestCase): + """Test SessionId initialization and file reading with various encodings""" + + def setUp(self): + super().setUp() + # A valid session ID for testing + self.valid_session_id = f"builds/{self.build_name}/test_sessions/{self.session_id}" + + def _assert_session_from_file(self, encoding: str, content: str): + """Helper method to test reading session ID from a file with specific encoding""" + with tempfile.NamedTemporaryFile(mode='w', encoding=encoding, delete=False, suffix='.txt') as f: + f.write(content) + temp_path = f.name + + try: + session = SessionId(f"@{temp_path}") + self.assertEqual(str(session), self.valid_session_id) + self.assertEqual(session.build_part, self.build_name) + self.assertEqual(session.test_part, self.session_id) + finally: + os.unlink(temp_path) + + def test_session_id_from_utf8_file_without_bom(self): + """Test reading session ID from a UTF-8 file without BOM""" + # also, extra NL + self._assert_session_from_file('utf-8', f"{self.valid_session_id}\n") + + def test_session_id_from_utf8_file_with_bom(self): + """Test reading session ID from a UTF-8 file with BOM (UTF-8 signature)""" + self._assert_session_from_file('utf-8-sig', self.valid_session_id) + + def test_session_id_from_utf16_le_file(self): + """Test reading session ID from a UTF-16 LE file with BOM (PowerShell default on Windows)""" + self._assert_session_from_file('utf-16-le', f'\ufeff{self.valid_session_id}') + + def test_session_id_from_utf16_file(self): + """Test reading session ID from a UTF-16 file with BOM (using utf-16 encoding)""" + self._assert_session_from_file('utf-16', self.valid_session_id) From 56a936c82925497daa7be4f07e4cdeb7867f8446 Mon Sep 17 00:00:00 2001 From: Kohsuke Kawaguchi Date: Mon, 9 Feb 2026 14:32:20 -0800 Subject: [PATCH 2/3] Update smart_tests/utils/session.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- smart_tests/utils/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smart_tests/utils/session.py b/smart_tests/utils/session.py index b5e3369a2..5976d26a0 100644 --- a/smart_tests/utils/session.py +++ b/smart_tests/utils/session.py @@ -38,7 +38,7 @@ def __init__(self, id: str): # First we try utf-8-sig, which handles UTF-8 BOM correctly. # our session ID only uses ASCII chars, so unless the writer used non ascii compatible encoding # (e.g., EBCDIC but those are very very unlikely), this will read the file correctly. - # If the writer used UTF-16 (e.g., legacy PowerShell on Windows), we'll get a decode error, an + # If the writer used UTF-16 (e.g., legacy PowerShell on Windows), we'll get a decode error, and # then we try UTF-16, which handles BOM correctly. try: try: From f55a65762c95c89b792151e8fc4806d38e429ae2 Mon Sep 17 00:00:00 2001 From: Kohsuke Kawaguchi Date: Mon, 9 Feb 2026 14:33:09 -0800 Subject: [PATCH 3/3] Update smart_tests/utils/session.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- smart_tests/utils/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smart_tests/utils/session.py b/smart_tests/utils/session.py index 5976d26a0..67f3ae06c 100644 --- a/smart_tests/utils/session.py +++ b/smart_tests/utils/session.py @@ -30,7 +30,7 @@ def __init__(self, id: str): '''This is the method in which we parse the user input, so be defensive''' if id.startswith('@'): file_path = id[1:] - # Earlier versions of PowerShell writes Unicode BOM when redirecting output to a file. + # Earlier versions of PowerShell write Unicode BOM when redirecting output to a file. # https://github.com/PowerShell/PowerShell/issues/8592 # Since we tell people to redirect `record session` output to a file, here we can # encounter such files. Here's the scheme to cope with this.