From 64550f79fa30719491a1a84719f8c2fb9cb81653 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Mon, 17 Mar 2025 14:20:11 +0100 Subject: [PATCH 1/2] Fix handling files with unicode names or contents This adds the `git ls-files` which instructs git to output files separated by `\0`, and otherwise output filenames verbatim without any special encoding. The second fix is related to enforcing `utf-8` encoding when reading in file contents in order to produce "file fixes". --- codecov_cli/helpers/versioning_systems.py | 12 ++---------- codecov_cli/services/upload/upload_collector.py | 2 +- ...0\320\276\320\273\320\273\320\265\321\200.php" | 5 +++++ tests/helpers/test_versioning_systems.py | 15 ++++++++++++--- tests/services/upload/test_upload_collector.py | 9 +++++++++ 5 files changed, 29 insertions(+), 14 deletions(-) create mode 100644 "tests/data/\320\232\320\276\320\275\321\202\321\200\320\276\320\273\320\273\320\265\321\200\321\213/\320\237\320\276\320\273\321\214\320\267\320\276\320\262\320\260\321\202\320\265\320\273\321\214/\320\223\320\273\320\260\320\262\320\275\321\213\320\271\320\232\320\276\320\275\321\202\321\200\320\276\320\273\320\273\320\265\321\200.php" diff --git a/codecov_cli/helpers/versioning_systems.py b/codecov_cli/helpers/versioning_systems.py index ff31c0506..3d204ba91 100644 --- a/codecov_cli/helpers/versioning_systems.py +++ b/codecov_cli/helpers/versioning_systems.py @@ -168,19 +168,11 @@ def list_relevant_files( if dir_to_use is None: raise ValueError("Can't determine root folder") - cmd = ["git", "-C", str(dir_to_use), "ls-files"] + cmd = ["git", "-C", str(dir_to_use), "ls-files", "-z"] if recurse_submodules: cmd.append("--recurse-submodules") res = subprocess.run(cmd, capture_output=True) - - return [ - ( - filename[1:-1] - if filename.startswith('"') and filename.endswith('"') - else filename - ) - for filename in res.stdout.decode("unicode_escape").strip().split("\n") - ] + return res.stdout.decode().split("\0") class NoVersioningSystem(VersioningSystemInterface): diff --git a/codecov_cli/services/upload/upload_collector.py b/codecov_cli/services/upload/upload_collector.py index 5ca7c0af7..22310e2f4 100644 --- a/codecov_cli/services/upload/upload_collector.py +++ b/codecov_cli/services/upload/upload_collector.py @@ -115,7 +115,7 @@ def _get_file_fixes( eof = None try: - with open(filename, "r") as f: + with open(filename, "r", encoding="utf-8") as f: # If lineno is unset that means that the # file is empty thus the eof should be 0 # so lineno will be set to -1 here diff --git "a/tests/data/\320\232\320\276\320\275\321\202\321\200\320\276\320\273\320\273\320\265\321\200\321\213/\320\237\320\276\320\273\321\214\320\267\320\276\320\262\320\260\321\202\320\265\320\273\321\214/\320\223\320\273\320\260\320\262\320\275\321\213\320\271\320\232\320\276\320\275\321\202\321\200\320\276\320\273\320\273\320\265\321\200.php" "b/tests/data/\320\232\320\276\320\275\321\202\321\200\320\276\320\273\320\273\320\265\321\200\321\213/\320\237\320\276\320\273\321\214\320\267\320\276\320\262\320\260\321\202\320\265\320\273\321\214/\320\223\320\273\320\260\320\262\320\275\321\213\320\271\320\232\320\276\320\275\321\202\321\200\320\276\320\273\320\273\320\265\321\200.php" new file mode 100644 index 000000000..6d5aea038 --- /dev/null +++ "b/tests/data/\320\232\320\276\320\275\321\202\321\200\320\276\320\273\320\273\320\265\321\200\321\213/\320\237\320\276\320\273\321\214\320\267\320\276\320\262\320\260\321\202\320\265\320\273\321\214/\320\223\320\273\320\260\320\262\320\275\321\213\320\271\320\232\320\276\320\275\321\202\321\200\320\276\320\273\320\273\320\265\321\200.php" @@ -0,0 +1,5 @@ +// See +// Plus, add a bunch of unicode chars in order to trigger +// + +// µ, ¹², ‘’“”, őá…–🤮🚀¿ 한글 테스트 \ No newline at end of file diff --git a/tests/helpers/test_versioning_systems.py b/tests/helpers/test_versioning_systems.py index f3155cab4..a8d7b63b8 100644 --- a/tests/helpers/test_versioning_systems.py +++ b/tests/helpers/test_versioning_systems.py @@ -105,8 +105,7 @@ def test_list_relevant_files_returns_correct_network_files(self, mocker, tmp_pat "codecov_cli.helpers.versioning_systems.subprocess.run", return_value=mocked_subprocess, ) - # git ls-files diplays a single \n as \\\\n - mocked_subprocess.stdout = b'a.txt\nb.txt\n"a\\\\nb.txt"\nc.txt\nd.txt\n.circleci/config.yml\nLICENSE\napp/advanced calculations/advanced_calculator.js\n' + mocked_subprocess.stdout = b"a.txt\0b.txt\0a\\nb.txt\0c.txt\0d.txt\0.circleci/config.yml\0LICENSE\0app/advanced calculations/advanced_calculator.js" vs = GitVersioningSystem() @@ -138,6 +137,16 @@ def test_list_relevant_files_recurse_submodules(self, mocker, tmp_path): vs = GitVersioningSystem() _ = vs.list_relevant_files(tmp_path, recurse_submodules=True) subproc_run.assert_called_with( - ["git", "-C", str(tmp_path), "ls-files", "--recurse-submodules"], + ["git", "-C", str(tmp_path), "ls-files", "-z", "--recurse-submodules"], capture_output=True, ) + + +def test_exotic_git_filenames(): + vs = GitVersioningSystem() + found_repo_files = vs.list_relevant_files() + + # See + assert ( + "tests/data/Контроллеры/Пользователь/ГлавныйКонтроллер.php" in found_repo_files + ) diff --git a/tests/services/upload/test_upload_collector.py b/tests/services/upload/test_upload_collector.py index 1d57b3972..fecab426c 100644 --- a/tests/services/upload/test_upload_collector.py +++ b/tests/services/upload/test_upload_collector.py @@ -87,6 +87,15 @@ def test_fix_php_files(): assert fixes_for_php_file.fixed_lines_with_reason == set([]) +def test_can_read_unicode_file(): + col = UploadCollector(None, None, None, None, True) + + php_file = Path("tests/data/Контроллеры/Пользователь/ГлавныйКонтроллер.php") + _fixes = col._produce_file_fixes([php_file]) + # we just want to assert that this is not throwing an error related to file encoding, + # see + + def test_fix_for_cpp_swift_vala(tmp_path): cpp_file = Path("tests/data/files_to_fix_examples/sample.cpp") From 9ec792485dc6adba9f6200f628bbc82adea0fc51 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Thu, 27 Mar 2025 12:00:23 +0100 Subject: [PATCH 2/2] implement fallback `list_relevant_files` in terms of `search_files` --- codecov_cli/helpers/versioning_systems.py | 25 +++++-------------- tests/helpers/test_versioning_systems.py | 14 ++++++++++- .../services/upload/test_upload_collector.py | 10 -------- 3 files changed, 19 insertions(+), 30 deletions(-) diff --git a/codecov_cli/helpers/versioning_systems.py b/codecov_cli/helpers/versioning_systems.py index 3d204ba91..ffe2292f4 100644 --- a/codecov_cli/helpers/versioning_systems.py +++ b/codecov_cli/helpers/versioning_systems.py @@ -1,11 +1,13 @@ from itertools import chain import logging +import re import subprocess import typing as t from pathlib import Path from shutil import which from codecov_cli.fallbacks import FallbackFieldEnum +from codecov_cli.helpers.folder_searcher import search_files from codecov_cli.helpers.git import parse_git_service, parse_slug from abc import ABC, abstractmethod @@ -193,22 +195,7 @@ def list_relevant_files( if dir_to_use is None: raise ValueError("Can't determine root folder") - cmd = [ - "find", - str(dir_to_use), - *chain.from_iterable( - ["-name", block, "-prune", "-o"] for block in IGNORE_DIRS - ), - *chain.from_iterable( - ["-path", block, "-prune", "-o"] for block in IGNORE_PATHS - ), - "-type", - "f", - "-print", - ] - res = subprocess.run(cmd, capture_output=True) - return [ - filename - for filename in res.stdout.decode("unicode_escape").strip().split("\n") - if filename - ] + files = search_files( + dir_to_use, folders_to_ignore=[], filename_include_regex=re.compile("") + ) + return [f.relative_to(dir_to_use).as_posix() for f in files] diff --git a/tests/helpers/test_versioning_systems.py b/tests/helpers/test_versioning_systems.py index a8d7b63b8..2437be897 100644 --- a/tests/helpers/test_versioning_systems.py +++ b/tests/helpers/test_versioning_systems.py @@ -3,7 +3,10 @@ import pytest from codecov_cli.fallbacks import FallbackFieldEnum -from codecov_cli.helpers.versioning_systems import GitVersioningSystem +from codecov_cli.helpers.versioning_systems import ( + GitVersioningSystem, + NoVersioningSystem, +) class TestGitVersioningSystem(object): @@ -150,3 +153,12 @@ def test_exotic_git_filenames(): assert ( "tests/data/Контроллеры/Пользователь/ГлавныйКонтроллер.php" in found_repo_files ) + + +def test_exotic_fallback_filenames(): + vs = NoVersioningSystem() + found_repo_files = vs.list_relevant_files() + + assert ( + "tests/data/Контроллеры/Пользователь/ГлавныйКонтроллер.php" in found_repo_files + ) diff --git a/tests/services/upload/test_upload_collector.py b/tests/services/upload/test_upload_collector.py index fecab426c..1551e8601 100644 --- a/tests/services/upload/test_upload_collector.py +++ b/tests/services/upload/test_upload_collector.py @@ -1,7 +1,5 @@ from pathlib import Path from unittest.mock import patch -import pytest -import sys from codecov_cli.helpers.versioning_systems import ( GitVersioningSystem, @@ -191,10 +189,6 @@ def test_generate_upload_data(tmp_path): @patch("codecov_cli.services.upload.upload_collector.logger") -@pytest.mark.skipif( - sys.platform == "win32", - reason="the fallback `list_relevant_files` is currently broken on windows", -) def test_generate_upload_data_with_none_network(mock_logger, tmp_path): (tmp_path / "coverage.xml").touch() @@ -215,10 +209,6 @@ def test_generate_upload_data_with_none_network(mock_logger, tmp_path): assert len(res.file_fixes) > 1 -@pytest.mark.skipif( - sys.platform == "win32", - reason="the fallback `list_relevant_files` is currently broken on windows", -) def test_generate_network_with_no_versioning_system(tmp_path): versioning_system = NoVersioningSystem() found_files = versioning_system.list_relevant_files()