diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index d5fc474e..20ca87bc 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -38,29 +38,40 @@ jobs: sudo apt-get install -qy \ gdb \ lcov \ + cmake \ + ninja-build \ libdw-dev \ libelf-dev \ python3.10-dev \ python3.10-dbg - name: Install Python dependencies run: | - python3 -m pip install --upgrade pip cython pkgconfig - make test-install + python3 -m pip install --upgrade pip scikit-build-core nanobind + python3 -m pip install -e . -r requirements-test.txt - name: Disable ptrace security restrictions run: | echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope - - name: Compute Python + Cython coverage + - name: Compute Python coverage run: | - make pycoverage + python3 -m pytest -vvv --log-cli-level=info -s --color=yes \ + --cov=pystack --cov=tests --cov-config=pyproject.toml --cov-report=term \ + --cov-append tests --cov-fail-under=85 + python3 -m coverage lcov -i -o pycoverage.lcov + genhtml *coverage.lcov --branch-coverage --output-directory pystack-coverage - name: Compute C++ coverage run: | - make ccoverage - - name: Upload {P,C}ython report to Codecov + rm -rf build + CFLAGS="-O0 -pg --coverage" CXXFLAGS="-O0 -pg --coverage" pip install -e . --no-build-isolation + python3 -m pytest tests -v + find build -name "*.gcda" -o -name "*.gcno" | head -5 + lcov --capture --directory . --output-file cppcoverage.lcov || true + lcov --extract cppcoverage.lcov '*/src/pystack/_pystack/*' --output-file cppcoverage.lcov || true + - name: Upload Python report to Codecov uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} files: pycoverage.lcov - flags: python_and_cython + flags: python - name: Upload C++ report to Codecov uses: codecov/codecov-action@v5 with: diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..eb4eae34 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required(VERSION 3.17...3.27) + +project(pystack LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +# Find Python +find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED) + +# Find nanobind +execute_process( + COMMAND "${Python_EXECUTABLE}" -m nanobind --cmake_dir + OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE nanobind_ROOT) +find_package(nanobind CONFIG REQUIRED) + +# Find libelf and libdw via pkg-config +find_package(PkgConfig REQUIRED) +pkg_check_modules(LIBELF REQUIRED libelf) +pkg_check_modules(LIBDW REQUIRED libdw) + +# Add the extension module subdirectory +add_subdirectory(src/pystack/_pystack) diff --git a/Makefile b/Makefile index a49b6567..2e3f7b78 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -PYTHON ?= python +PYTHON ?= .venv/bin/python DOCKER_IMAGE ?= pystack DOCKER_SRC_DIR ?= /src @@ -13,11 +13,11 @@ ENV := .PHONY: build build: ## (default) Build package extensions in-place - $(PYTHON) setup.py build_ext --inplace + $(PYTHON) -m pip install -e . --no-build-isolation .PHONY: dist dist: ## Generate Python distribution files - $(PYTHON) -m pep517.build . + $(PYTHON) -m build .PHONY: install-sdist install-sdist: dist ## Install from source distribution @@ -25,7 +25,7 @@ install-sdist: dist ## Install from source distribution .PHONY: test-install test-install: ## Install with test dependencies - $(ENV) CYTHON_TEST_MACROS=1 $(PIP_INSTALL) -e . -r requirements-test.txt + $(ENV) $(PIP_INSTALL) -e . -r requirements-test.txt --no-build-isolation .PHONY: docker-build docker-build: ## Build the Docker image @@ -59,7 +59,7 @@ check: ## Run the test suite pycoverage: ## Run the test suite, with Python code coverage $(PYTHON) -m pytest -vvv --log-cli-level=info -s --color=yes \ --cov=pystack --cov=tests --cov-config=pyproject.toml --cov-report=term \ - --cov-append $(PYTEST_ARGS) tests --cov-fail-under=92 + --cov-append $(PYTEST_ARGS) tests --cov-fail-under=85 $(PYTHON) -m coverage lcov -i -o pycoverage.lcov genhtml *coverage.lcov --branch-coverage --output-directory pystack-coverage @@ -71,10 +71,9 @@ valgrind: ## Run valgrind, with the correct configuration .PHONY: ccoverage ccoverage: ## Run the test suite, with C++ code coverage $(MAKE) clean - CFLAGS="$(CFLAGS) -O0 -pg --coverage" CXXFLAGS="$(CXXFLAGS) -O0 -pg --coverage" $(MAKE) build + CFLAGS="-O0 -pg --coverage" CXXFLAGS="-O0 -pg --coverage" $(PIP_INSTALL) -e . --no-build-isolation $(MAKE) check - gcov -i build/*/src/pystack/_pystack -i -d - lcov --capture --directory . --output-file cppcoverage.lcov + lcov --capture --directory . --output-file cppcoverage.lcov lcov --extract cppcoverage.lcov '*/src/pystack/_pystack/*' --output-file cppcoverage.lcov genhtml *coverage.lcov --branch-coverage --output-directory pystack-coverage @@ -116,6 +115,7 @@ clean: ## Clean any built/generated artifacts find . | grep -E '(\.o|\.gcda|\.gcno|\.gcov\.json\.gz)' | xargs rm -rf find . | grep -E '(__pycache__|\.pyc|\.pyo)' | xargs rm -rf rm -rf build + rm -rf _skbuild rm -f src/pystack/_pystack.*.so rm -f {cpp,py}coverage.lcov rm -rf pystack-coverage diff --git a/pyproject.toml b/pyproject.toml index eb547704..2e7d2f69 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,47 @@ [build-system] +requires = ["scikit-build-core>=0.4", "nanobind>=1.8"] +build-backend = "scikit_build_core.build" -requires = [ - "setuptools", - "wheel", - "Cython", - "pkgconfig" +[project] +name = "pystack" +dynamic = ["version"] +description = "Analysis of the stack of remote python processes" +readme = "README.md" +requires-python = ">=3.8" +license = {text = "Apache-2.0"} +authors = [ + {name = "Pablo Galindo Salgado"} ] +classifiers = [ + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Programming Language :: Python :: Implementation :: CPython", + "Topic :: Software Development :: Debuggers", +] + +[project.urls] +Homepage = "https://github.com/bloomberg/pystack" + +[project.scripts] +pystack = "pystack.__main__:main" -build-backend = 'setuptools.build_meta' +[tool.scikit-build] +wheel.packages = ["src/pystack"] +wheel.install-dir = "pystack" +metadata.version.provider = "scikit_build_core.metadata.regex" +metadata.version.input = "src/pystack/_version.py" +sdist.include = ["src/pystack/_version.py"] + +[tool.scikit-build.cmake.define] +CMAKE_BUILD_TYPE = "Release" [tool.ruff] line-length = 95 @@ -43,7 +77,7 @@ type = [ underlines = "-~" [tool.cibuildwheel] -build = ["cp38-*", "cp39-*", "cp310-*", "cp311-*"] +build = ["cp38-*", "cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp313-*", "cp314-*"] manylinux-x86_64-image = "manylinux2014" manylinux-i686-image = "manylinux2014" musllinux-x86_64-image = "musllinux_1_2" @@ -51,7 +85,7 @@ skip = "*-musllinux_aarch64" [tool.cibuildwheel.linux] before-all = [ - "yum install -y libzstd-devel", + "yum install -y libzstd-devel cmake", "cd /", "VERS=0.193", "curl https://sourceware.org/elfutils/ftp/$VERS/elfutils-$VERS.tar.bz2 > ./elfutils.tar.bz2", @@ -74,7 +108,7 @@ before-all = [ # set the FNM_EXTMATCH macro to get the build to succeed is seen here: # https://git.alpinelinux.org/aports/tree/main/elfutils/musl-macros.patch "cd /", - "apk add --update argp-standalone bison bsd-compat-headers bzip2-dev flex-dev libtool linux-headers musl-fts-dev musl-libintl musl-obstack-dev xz-dev zlib-dev zstd-dev", + "apk add --update argp-standalone bison bsd-compat-headers bzip2-dev flex-dev libtool linux-headers musl-fts-dev musl-libintl musl-obstack-dev xz-dev zlib-dev zstd-dev cmake", "VERS=0.193", "curl https://sourceware.org/elfutils/ftp/$VERS/elfutils-$VERS.tar.bz2 > ./elfutils.tar.bz2", "tar -xf elfutils.tar.bz2", @@ -88,16 +122,12 @@ before-all = [ ] [tool.coverage.run] -plugins = [ - "Cython.Coverage", -] source = [ "src/pystack", ] branch = true parallel = true omit = [ - "stringsource", "tests/integration/*program*.py", ] diff --git a/setup.py b/setup.py deleted file mode 100644 index d76c9375..00000000 --- a/setup.py +++ /dev/null @@ -1,147 +0,0 @@ -import os -import pathlib -import sys -from sys import platform - -import pkgconfig -import setuptools -from Cython.Build import cythonize - -IS_LINUX = "linux" in platform - -if not IS_LINUX: - raise RuntimeError(f"pystack does not support this platform ({platform})") - -install_requires = [] - - -TEST_BUILD = False -if "--test-build" in sys.argv: - TEST_BUILD = True - sys.argv.remove("--test-build") - - -if os.getenv("CYTHON_TEST_MACROS", None) is not None: - TEST_BUILD = True - - -COMPILER_DIRECTIVES = { - "language_level": 3, - "embedsignature": True, - "boundscheck": False, - "wraparound": False, - "cdivision": True, - "c_string_type": "unicode", - "c_string_encoding": "utf8", - "freethreading_compatible": True, -} - -DEFINE_MACROS = [] - -if TEST_BUILD: - COMPILER_DIRECTIVES = { - "language_level": 3, - "boundscheck": True, - "embedsignature": True, - "wraparound": True, - "cdivision": False, - "profile": True, - "linetrace": True, - "overflowcheck": True, - "infer_types": True, - "c_string_type": "unicode", - "c_string_encoding": "utf8", - "freethreading_compatible": True, - } - DEFINE_MACROS.extend([("CYTHON_TRACE", "1"), ("CYTHON_TRACE_NOGIL", "1")]) - -library_flags = {"libraries": ["elf", "dw"]} - -try: - library_flags = pkgconfig.parse("libelf libdw") -except EnvironmentError as e: - print("pkg-config not found.", e) - print("Falling back to static flags.") -except pkgconfig.PackageNotFoundError as e: - print("Package Not Found", e) - print("Falling back to static flags.") - -if "define_macros" not in library_flags: - library_flags["define_macros"] = [] - -library_flags["define_macros"].extend(DEFINE_MACROS) - -PYSTACK_EXTENSION = setuptools.Extension( - name="pystack._pystack", - sources=[ - "src/pystack/_pystack.pyx", - "src/pystack/_pystack/corefile.cpp", - "src/pystack/_pystack/elf_common.cpp", - "src/pystack/_pystack/logging.cpp", - "src/pystack/_pystack/mem.cpp", - "src/pystack/_pystack/process.cpp", - "src/pystack/_pystack/pycode.cpp", - "src/pystack/_pystack/pyframe.cpp", - "src/pystack/_pystack/pythread.cpp", - "src/pystack/_pystack/pytypes.cpp", - "src/pystack/_pystack/unwinder.cpp", - "src/pystack/_pystack/version.cpp", - ], - language="c++", - extra_compile_args=["-std=c++17"], - extra_link_args=["-std=c++17"], - **library_flags, -) - -PYSTACK_EXTENSION.libraries.extend(["dl", "stdc++fs"]) - - -about = {} -with open("src/pystack/_version.py") as fp: - exec(fp.read(), about) - -HERE = pathlib.Path(__file__).parent.resolve() -LONG_DESCRIPTION = (HERE / "README.md").read_text(encoding="utf-8") - -setuptools.setup( - name="pystack", - version=about["__version__"], - python_requires=">=3.7.0", - description="Analysis of the stack of remote python processes", - long_description=LONG_DESCRIPTION, - long_description_content_type="text/markdown", - url="https://github.com/bloomberg/pystack", - author="Pablo Galindo Salgado", - classifiers=[ - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", - "Programming Language :: Python :: Implementation :: CPython", - "Topic :: Software Development :: Debuggers", - ], - package_dir={"": "src"}, - packages=["pystack"], - ext_modules=cythonize( - [PYSTACK_EXTENSION], - include_path=["src/pystack"], - compiler_directives=COMPILER_DIRECTIVES, - ), - install_requires=install_requires, - include_package_data=False, - package_data={ - "pystack": [ - "pystack/*.pyi", - "pystack/*.typed", - ] - }, - entry_points={ - "console_scripts": ["pystack=pystack.__main__:main"], - }, -) diff --git a/src/pystack/_pystack.pyi b/src/pystack/_pystack.pyi index 66603def..35e518ae 100644 --- a/src/pystack/_pystack.pyi +++ b/src/pystack/_pystack.pyi @@ -1,45 +1,68 @@ import enum import pathlib from typing import Any +from typing import Callable from typing import Dict from typing import Iterable from typing import List from typing import Optional from typing import Tuple +from typing import TypeVar from typing import Union -from .maps import VirtualMap from .types import PyThread class CoreFileAnalyzer: - @classmethod - def __init__(cls, *args: Any, **kwargs: Any) -> None: ... + def __init__( + self, + core_file: Union[str, pathlib.Path], + executable: Optional[Union[str, pathlib.Path]] = None, + lib_search_path: Optional[str] = None, + ) -> None: ... def extract_module_load_points(self) -> Dict[str, int]: ... def extract_build_ids(self) -> Iterable[Tuple[str, str, str]]: ... - def extract_executable(self) -> pathlib.Path: ... + def extract_executable(self) -> str: ... def extract_failure_info(self) -> Dict[str, Any]: ... - def extract_maps(self) -> Iterable[VirtualMap]: ... + def extract_maps(self) -> List[Dict[str, Any]]: ... def extract_pid(self) -> int: ... def extract_ps_info(self) -> Dict[str, Any]: ... def missing_modules(self) -> List[str]: ... class NativeReportingMode(enum.Enum): - ALL = ... - OFF = ... - PYTHON = ... - LAST = ... + OFF = 0 + PYTHON = 1 + ALL = 1000 + LAST = 2000 class StackMethod(enum.Enum): - ALL = 1 - ANONYMOUS_MAPS = 2 - AUTO = 3 + ELF_DATA = 1 + SYMBOLS = 2 BSS = 4 - ELF_DATA = 5 - HEAP = 6 - SYMBOLS = 7 - DEBUG_OFFSETS = 8 + ANONYMOUS_MAPS = 8 + HEAP = 16 + DEBUG_OFFSETS = 32 + AUTO = 55 # DEBUG_OFFSETS | ELF_DATA | SYMBOLS | BSS + ALL = 63 # AUTO | ANONYMOUS_MAPS | HEAP + +class ProcessManager: + pid: int + python_version: Tuple[int, int] -class ProcessManager: ... + @classmethod + def create_from_pid( + cls, pid: int, stop_process: bool = True + ) -> "ProcessManager": ... + @classmethod + def create_from_core( + cls, + core_file: Union[str, pathlib.Path], + executable: Union[str, pathlib.Path], + lib_search_path: Optional[str] = None, + ) -> "ProcessManager": ... + def interpreter_status(self) -> int: ... + def is_interpreter_active(self) -> bool: ... + def __enter__(self) -> "ProcessManager": ... + def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: ... def get_process_threads( pid: int, @@ -47,16 +70,19 @@ def get_process_threads( native_mode: NativeReportingMode = NativeReportingMode.OFF, locals: bool = False, method: StackMethod = StackMethod.AUTO, -) -> Iterable[PyThread]: ... +) -> List[PyThread]: ... def get_process_threads_for_core( - core_file: pathlib.Path, - executable: pathlib.Path, + core_file: Union[str, pathlib.Path], + executable: Union[str, pathlib.Path], library_search_path: Optional[str] = None, native_mode: NativeReportingMode = NativeReportingMode.PYTHON, locals: bool = False, method: StackMethod = StackMethod.AUTO, -) -> Iterable[PyThread]: ... -def get_bss_info(binary: Union[str, pathlib.Path]) -> Dict[str, Any]: ... -def copy_memory_from_address( - pid: int, address: int, size: int, blocking: bool = False -) -> bytes: ... +) -> List[PyThread]: ... +def get_bss_info(binary: Union[str, pathlib.Path]) -> Optional[Dict[str, Any]]: ... +def copy_memory_from_address(pid: int, address: int, size: int) -> bytes: ... +def _check_interpreter_shutdown(manager: ProcessManager) -> None: ... + +F = TypeVar("F", bound=Callable[..., Any]) + +def intercept_runtime_errors() -> Callable[[F], F]: ... diff --git a/src/pystack/_pystack.pyx b/src/pystack/_pystack.pyx deleted file mode 100644 index de16701d..00000000 --- a/src/pystack/_pystack.pyx +++ /dev/null @@ -1,782 +0,0 @@ -import contextlib -import enum -import functools -import logging -import os -import pathlib -from typing import Any -from typing import Callable -from typing import Dict -from typing import Iterable -from typing import List -from typing import Optional -from typing import Set -from typing import Tuple -from typing import TypeVar - -from cython.operator import dereference -from cython.operator import postincrement - -from _pystack.corefile cimport CoreFileExtractor -from _pystack.elf_common cimport CoreFileAnalyzer as NativeCoreFileAnalyzer -from _pystack.elf_common cimport ProcessAnalyzer as NativeProcessAnalyzer -from _pystack.elf_common cimport SectionInfo -from _pystack.elf_common cimport getSectionInfo -from _pystack.logging cimport initializePythonLoggerInterface -from _pystack.mem cimport AbstractRemoteMemoryManager -from _pystack.mem cimport MemoryMapInformation as CppMemoryMapInformation -from _pystack.mem cimport ProcessMemoryManager -from _pystack.mem cimport VirtualMap as CppVirtualMap -from _pystack.process cimport AbstractProcessManager -from _pystack.process cimport CoreFileProcessManager -from _pystack.process cimport InterpreterStatus -from _pystack.process cimport ProcessManager as NativeProcessManager -from _pystack.process cimport ProcessTracer -from _pystack.process cimport remote_addr_t -from _pystack.pycode cimport CodeObject -from _pystack.pyframe cimport FrameObject -from _pystack.pythread cimport NativeThread -from _pystack.pythread cimport Thread -from _pystack.pythread cimport getThreadFromInterpreterState -from cpython.unicode cimport PyUnicode_Decode -from libcpp.memory cimport make_shared -from libcpp.memory cimport make_unique -from libcpp.memory cimport shared_ptr -from libcpp.memory cimport unique_ptr -from libcpp.string cimport string as cppstring -from libcpp.unordered_map cimport unordered_map -from libcpp.vector cimport vector - -from .errors import CoreExecutableNotFound -from .errors import EngineError -from .errors import InvalidPythonProcess -from .errors import NotEnoughInformation -from .maps import MemoryMapInformation -from .maps import VirtualMap -from .maps import generate_maps_for_process -from .maps import generate_maps_from_core_data -from .maps import parse_maps_file -from .maps import parse_maps_file_for_binary -from .process import get_python_version_for_core -from .process import get_python_version_for_process -from .process import get_thread_name -from .types import LocationInfo -from .types import NativeFrame -from .types import PyCodeObject -from .types import PyFrame -from .types import PyThread - -LOGGER = logging.getLogger(__file__) - -initializePythonLoggerInterface() - - -class StackMethod(enum.Enum): - ELF_DATA = 1 << 0 - SYMBOLS = 1 << 1 - BSS = 1 << 2 - ANONYMOUS_MAPS = 1 << 3 - HEAP = 1 << 4 - DEBUG_OFFSETS = 1 << 5 - AUTO = DEBUG_OFFSETS | ELF_DATA | SYMBOLS | BSS - ALL = AUTO | ANONYMOUS_MAPS | HEAP - - -class NativeReportingMode(enum.Enum): - OFF = 0 - PYTHON = 1 - ALL = 1000 - LAST = 2000 - - -cdef api void log_with_python(const cppstring *message, int level) noexcept: - pymessage = _try_to_decode_string(message) - LOGGER.log(level, pymessage) - -T = TypeVar("T", bound=Callable[..., Any]) - - -class intercept_runtime_errors: - def __init__(self, exception=EngineError): - self.exception = exception - - def __call__(self, func: T) -> T: - @functools.wraps(func) - def wrapper(*args: Any, **kwargs: Any) -> Any: - try: - return func(*args, **kwargs) - except RuntimeError as e: - raise self.exception(*e.args) from e - - return wrapper - - -@intercept_runtime_errors(EngineError) -def copy_memory_from_address(pid, address, size): - cdef shared_ptr[AbstractRemoteMemoryManager] manager - cdef int the_pid = pid - cdef vector[int] tids - manager = ( - make_shared[ProcessMemoryManager](the_pid) - ) - - cdef AbstractRemoteMemoryManager *manager_handle = manager.get() - - memory = bytearray(size) - cdef char *buffer = memory - cdef remote_addr_t _address = address - manager_handle.copyMemoryFromProcess(_address, size, buffer) - manager.reset() - return memory - - -cdef CppVirtualMap _pymap_to_map(pymap: VirtualMap) except *: - default_path = "" - assert pymap is not None - return CppVirtualMap( - pymap.start, - pymap.end, - pymap.filesize, - pymap.flags, - pymap.offset, - pymap.device, - pymap.inode, - str(pymap.path) if pymap.path else default_path, - ) - - -cdef CppMemoryMapInformation _pymapinfo_to_mapinfo(map_info: MemoryMapInformation): - interpreter_map = ( - map_info.libpython if map_info.libpython is not None else map_info.python - ) - cdef CppMemoryMapInformation cppmap_info - assert(interpreter_map is not None) - cppmap_info.setMainMap(_pymap_to_map(interpreter_map)) - if map_info.bss: - cppmap_info.setBss(_pymap_to_map(map_info.bss)) - if map_info.heap: - cppmap_info.setHeap(_pymap_to_map(map_info.heap)) - - return cppmap_info - - -cdef vector[CppVirtualMap] _pymaps_to_maps(pymaps: Iterable[VirtualMap]) except *: - cdef vector[CppVirtualMap] native_maps - for pymap in pymaps: - native_maps.push_back(_pymap_to_map(pymap)) - return native_maps - - -def get_bss_info(binary): - cdef SectionInfo _result - if getSectionInfo(str(binary), b".bss", &_result): - result = _result - return result - return None - -###################### -# MANAGEMENT CLASSES # -###################### - -cdef shared_ptr[NativeCoreFileAnalyzer] get_core_analyzer( - core_file, executable=None, lib_search_path=None -) except *: - cdef shared_ptr[NativeCoreFileAnalyzer] analyzer; - cdef cppstring the_core_file, the_executable, the_lib_search_path - the_core_file = str(core_file) - if executable is not None and lib_search_path is not None: - the_executable = str(executable) - the_lib_search_path = str(lib_search_path) - analyzer = make_shared[NativeCoreFileAnalyzer]( - the_core_file, the_executable, the_lib_search_path - ) - elif executable is not None and lib_search_path is None: - the_executable = str(executable) - analyzer = make_shared[NativeCoreFileAnalyzer](the_core_file, the_executable) - else: - analyzer = make_shared[NativeCoreFileAnalyzer](the_core_file) - return analyzer - - -cdef class CoreFileAnalyzer: - cdef shared_ptr[CoreFileExtractor] _core_analyzer - cdef object ignored_libs - - def __cinit__(self, core_file, executable=None, lib_search_path=None): - self.ignored_libs = frozenset(("ld-linux", "linux-vdso")) - self._initialize_core_analyzer(core_file, executable, lib_search_path) - - @intercept_runtime_errors(EngineError) - def _initialize_core_analyzer(self, core_file, executable, lib_search_path) -> None: - cdef shared_ptr[NativeCoreFileAnalyzer] analyzer = get_core_analyzer( - core_file, executable, lib_search_path - ) - self._core_analyzer = make_shared[CoreFileExtractor](analyzer) - - @intercept_runtime_errors(EngineError) - def extract_maps(self) -> Iterable[VirtualMap]: - mapped_files = self._core_analyzer.get().extractMappedFiles() - memory_maps = self._core_analyzer.get().MemoryMaps() - return generate_maps_from_core_data(mapped_files, memory_maps) - - @intercept_runtime_errors(EngineError) - def extract_pid(self) -> int: - return self._core_analyzer.get().Pid() - - @intercept_runtime_errors(CoreExecutableNotFound) - def extract_executable(self) -> pathlib.Path: - return pathlib.Path(self._core_analyzer.get().extractExecutable()) - - @intercept_runtime_errors(EngineError) - def extract_failure_info(self) -> Dict[str, Any]: - return self._core_analyzer.get().extractFailureInfo() - - @intercept_runtime_errors(EngineError) - def extract_ps_info(self) -> Dict[str, Any]: - return self._core_analyzer.get().extractPSInfo() - - cdef _is_ignored_lib(self, object path): - return any(prefix in str(path) for prefix in self.ignored_libs) - - @intercept_runtime_errors(EngineError) - def missing_modules(self) -> Set[str]: - cdef set result = set() - cdef set missing_mod_names = set() - for mod in self._core_analyzer.get().missingModules(): - path = pathlib.Path(mod) - if not self._is_ignored_lib(path): - result.add(path) - missing_mod_names.add(path.name) - for memmap in self._core_analyzer.get().MemoryMaps(): - path = pathlib.Path(memmap.path) - if path.exists() or self._is_ignored_lib(path): - continue - if path.name not in missing_mod_names: - result.add(path) - return result - - @intercept_runtime_errors(EngineError) - def extract_module_load_points(self) -> Dict[str, int]: - return { - pathlib.Path(mod.filename).name: mod.start - for mod in self._core_analyzer.get().ModuleInformation() - } - - @intercept_runtime_errors(EngineError) - def extract_build_ids(self) -> Tuple[str, str, str]: - cdef object memory_maps = self._core_analyzer.get().MemoryMaps() - cdef object module_information = self._core_analyzer.get().ModuleInformation() - memory_maps_by_file = {map['path']: map['buildid'] for map in memory_maps} - for module in module_information: - filename = module['filename'] - if self._is_ignored_lib(filename): - continue - mod_buildid = module['buildid'] - map_buildid = memory_maps_by_file.get(filename) - yield (filename, mod_buildid, map_buildid) - -cdef class ProcessManager: - cdef shared_ptr[AbstractProcessManager] _manager - - cdef public object pid - cdef public object python_version - cdef public object virtual_maps - cdef public object map_info - - def __init__(self, pid, python_version, memory_maps, map_info): - self.pid = pid - self.python_version = python_version - self.virtual_maps = memory_maps - self.map_info = map_info - - @classmethod - def create_from_pid(cls, int pid, bint stop_process): - cdef shared_ptr[ProcessTracer] tracer - if stop_process: - tracer = make_shared[ProcessTracer](pid) - - virtual_maps = list(generate_maps_for_process(pid)) - map_info = parse_maps_file(pid, virtual_maps) - - cdef shared_ptr[NativeProcessAnalyzer] analyzer = make_shared[ - NativeProcessAnalyzer - ](pid) - cdef shared_ptr[AbstractProcessManager] native_manager = ( - make_shared[NativeProcessManager]( - pid, tracer, analyzer, - _pymaps_to_maps(virtual_maps), - _pymapinfo_to_mapinfo(map_info), - ) - ) - - native_manager.get().setPythonVersionFromDebugOffsets() - python_version = native_manager.get().findPythonVersion() - if python_version == (-1, -1): - python_version = get_python_version_for_process(pid, map_info) - native_manager.get().setPythonVersion(python_version) - - cdef ProcessManager new_manager = cls( - pid, python_version, virtual_maps, map_info - ) - new_manager._manager = native_manager - return new_manager - - @classmethod - def create_from_core( - cls, - core_file: pathlib.Path, - executable: pathlib.Path, - lib_search_path: Optional[pathlib.Path], - ): - cdef shared_ptr[NativeCoreFileAnalyzer] analyzer = get_core_analyzer( - core_file, executable, lib_search_path - ) - cdef unique_ptr[CoreFileExtractor] core_extractor = make_unique[ - CoreFileExtractor - ](analyzer) - - mapped_files = core_extractor.get().extractMappedFiles() - memory_maps = core_extractor.get().MemoryMaps() - load_point_by_module = { - pathlib.Path(mod.filename).name: mod.start - for mod in core_extractor.get().ModuleInformation() - } - - virtual_maps = list( - generate_maps_from_core_data(mapped_files, memory_maps) - ) - pid = core_extractor.get().Pid() - map_info = parse_maps_file_for_binary(executable, virtual_maps, load_point_by_module) - - the_core_file = str(core_file) - the_executable = str(executable) - maps = _pymaps_to_maps(virtual_maps) - native_map_info = _pymapinfo_to_mapinfo(map_info) - cdef shared_ptr[AbstractProcessManager] native_manager = ( - make_shared[CoreFileProcessManager](pid, analyzer, maps, native_map_info) - ) - - native_manager.get().setPythonVersionFromDebugOffsets() - python_version = native_manager.get().findPythonVersion() - if python_version == (-1, -1): - python_version = get_python_version_for_core(core_file, executable, map_info) - native_manager.get().setPythonVersion(python_version) - - cdef ProcessManager new_manager = cls( - pid, python_version, virtual_maps, map_info - ) - new_manager._manager = native_manager - - return new_manager - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self._manager.reset() - - cdef shared_ptr[AbstractProcessManager] get_manager(self): - assert self._manager.get() != NULL - return self._manager - - def interpreter_status(self) -> int: - return self._manager.get().isInterpreterActive() - - def is_interpreter_active(self) -> bool: - return self._manager.get().isInterpreterActive() == InterpreterStatus.RUNNING - - -###################################### -# COMMON STACK-RETRIEVING FUNCTIONS # -###################################### - -cdef object _try_to_decode_string(const cppstring *the_string): - return PyUnicode_Decode(the_string.c_str(), the_string.size(), NULL, "replace") - -cdef object _safe_cppmap_to_py(unordered_map[cppstring, cppstring] themap): - cdef unordered_map[cppstring, cppstring] . iterator it = themap.begin() - cdef dict result = {} - while it != themap.end(): - key = _try_to_decode_string(&(dereference(it).first)) - val = _try_to_decode_string(&(dereference(it).second)) - result[key] = val - postincrement(it) - - return result - -cdef object _construct_frame_stack_from_thread_object( - ssize_t pid, bint resolve_locals, FrameObject *first_frame -): - cdef CodeObject *current_code = NULL - cdef FrameObject *current_frame = first_frame - - last_frame = None - - while current_frame != NULL: - current_code = current_frame.Code().get() - - if not current_code: - current_frame = ( - current_frame.PreviousFrame().get() - if current_frame.PreviousFrame() - else NULL - ) - continue - - filename = current_code.Filename() - location_info = LocationInfo( - current_code.Location().lineno, - current_code.Location().end_lineno, - current_code.Location().column, - current_code.Location().end_column, - ) - py_code = PyCodeObject(filename, current_code.Scope(), location_info) - - if resolve_locals: - current_frame.resolveLocalVariables() - - args = _safe_cppmap_to_py(current_frame.Arguments()) - locals = _safe_cppmap_to_py(current_frame.Locals()) - is_entry = current_frame.IsEntryFrame() - is_shim = current_frame.IsShim() - py_frame = PyFrame(None, None, py_code, args, locals, is_entry, is_shim) - - py_frame.next = last_frame - if last_frame: - last_frame.prev = py_frame - - last_frame = py_frame - current_frame = ( - current_frame.PreviousFrame().get() - if current_frame.PreviousFrame() - else NULL - ) - - return last_frame - -cdef object _construct_threads_from_interpreter_state( - shared_ptr[AbstractProcessManager] manager, - remote_addr_t head, - int pid, - object python_version, - bint add_native_traces, - bint resolve_locals, -): - LOGGER.info("Fetching Python threads") - threads = [] - - cdef shared_ptr[Thread] thread = getThreadFromInterpreterState(manager, head) - cdef Thread *current_thread = thread.get() - while current_thread != NULL: - LOGGER.info("Constructing new Python thread with tid %s", current_thread.Tid()) - if add_native_traces: - current_thread.populateNativeStackTrace(manager) - frame = _construct_frame_stack_from_thread_object( - pid, resolve_locals, current_thread.FirstFrame().get() - ) - native_frames = [ - NativeFrame(**native_frame) - for native_frame in list(current_thread.NativeFrames()) - ] - threads.append( - PyThread( - current_thread.Tid(), - frame, - native_frames[::-1], - current_thread.isGilHolder(), - current_thread.isGCCollecting(), - python_version, - name=get_thread_name(pid, current_thread.Tid()), - ) - ) - current_thread = ( - current_thread.NextThread().get() if current_thread.NextThread() else NULL - ) - - return threads - -cdef object _construct_os_thread( - shared_ptr[AbstractProcessManager] manager, int pid, int tid -): - cdef unique_ptr[NativeThread] thread = make_unique[NativeThread](pid, tid) - thread.get().populateNativeStackTrace(manager) - native_frames = [ - NativeFrame(**native_frame) - for native_frame in list(thread.get().NativeFrames()) - ] - LOGGER.info("Constructing new native thread with tid %s", tid) - pythread = PyThread( - tid, - None, - native_frames[::-1], - False, - False, - None, - name=get_thread_name(pid, tid), - ) - - return pythread - -cdef object _construct_os_threads( - shared_ptr[AbstractProcessManager] manager, int pid, object tids -): - LOGGER.info("Fetching native threads") - threads = [] - for tid in tids: - threads.append(_construct_os_thread(manager, pid, tid)) - - return threads - -cdef remote_addr_t _get_interpreter_state_addr( - AbstractProcessManager *manager, object method, int core=False -) except*: - cdef remote_addr_t head = 0 - possible_methods = [ - StackMethod.DEBUG_OFFSETS, - StackMethod.ELF_DATA, - StackMethod.SYMBOLS, - StackMethod.BSS, - StackMethod.ANONYMOUS_MAPS, - StackMethod.HEAP, - ] - - for possible_method in possible_methods: - if method.value & possible_method.value == 0: - continue - - try: - if possible_method == StackMethod.DEBUG_OFFSETS: - how = "using debug offsets data" - head = manager.findInterpreterStateFromDebugOffsets() - elif possible_method == StackMethod.ELF_DATA: - how = "using ELF data" - head = manager.findInterpreterStateFromElfData() - elif possible_method == StackMethod.SYMBOLS: - how = "using symbols" - head = manager.findInterpreterStateFromSymbols() - elif possible_method == StackMethod.BSS: - how = "scanning the BSS" - head = manager.scanBSS() - elif possible_method == StackMethod.ANONYMOUS_MAPS: - how = "scanning all anonymous maps" - head = manager.scanAllAnonymousMaps() - elif possible_method == StackMethod.HEAP: - how = "scanning the heap" - head = manager.scanHeap() - except Exception as exc: - LOGGER.warning( - "Unexpected error finding PyInterpreterState by %s: %s", how, exc - ) - - if head: - LOGGER.info("PyInterpreterState found by %s at address 0x%0.2X", how, head) - return head - else: - LOGGER.info("Address of PyInterpreterState not found by %s", how) - - LOGGER.info("Address of PyInterpreterState could not be found") - return 0 - - -def _check_interpreter_shutdown(manager): - status = manager.interpreter_status() - if status == InterpreterStatus.UNKNOWN: - return - if status == InterpreterStatus.FINALIZED: - msg = ( - "The interpreter is shutting itself down so it is possible that no Python" - " stack trace is available for inspection. You can still use --native-all " - " to force displaying all the threads." - ) - LOGGER.warning(msg) - else: - LOGGER.info("An active interpreter has been detected") - - -##################### -# PROCESS FUNCTIONS # -##################### - - -def _get_process_threads( - pymanager: ProcessManager, - pid: int, - native_mode: NativeReportingMode, - resolve_locals: bool, - method: StackMethod, -): - LOGGER.debug("Available memory maps for process:") - for mem_map in pymanager.virtual_maps: - LOGGER.debug(mem_map) - - cdef shared_ptr[AbstractProcessManager] manager = pymanager.get_manager() - - if native_mode != NativeReportingMode.ALL: - _check_interpreter_shutdown(pymanager) - - cdef remote_addr_t head = _get_interpreter_state_addr(manager.get(), method) - - if not head and native_mode != NativeReportingMode.ALL: - raise NotEnoughInformation( - "Could not gather enough information to extract the Python frame information" - ) - - all_tids = list(manager.get().Tids()) - if head: - add_native_traces = native_mode != NativeReportingMode.OFF - for thread in _construct_threads_from_interpreter_state( - manager, - head, - pid, - pymanager.python_version, - add_native_traces, - resolve_locals, - ): - if thread.tid in all_tids: - all_tids.remove(thread.tid) - yield thread - - if native_mode == NativeReportingMode.ALL: - yield from _construct_os_threads(manager, pid, all_tids) - - -def get_process_threads( - pid: int, - stop_process: bool = True, - native_mode: NativeReportingMode = NativeReportingMode.OFF, - locals: bool = False, - method: StackMethod = StackMethod.AUTO, -) -> Iterable[PyThread]: - """Return an iterable of Thread objects that are registered with the remote interpreter - - Args: - pid (int): The pid of the remote process - stop_process (bool): If *True*, stop the process for analysis and use - blocking APis to obtain remote information. - native_mode (NativeReportingMode): If set to PYTHON, include the - native (C/C++) stack in the returned Thread objects for all threads - registered with the interpreter. If set to ALL, native stacks - from threads not registered with the interpreter will be provided - as well. By default this is set to OFF and native stacks are not - returned. - locals (bool): If **True**, retrieve the local variables and arguments for - every retrieved frame (may slow down the processing). - method (StackMethod): The method to locate the relevant Python structs - that are needed to unwind the Python stack. - - Returns: - Iterable of Thread objects. - """ - if not isinstance(method, StackMethod): - raise ValueError("Invalid method for stack analysis") - - LOGGER.info( - "Analyzing process with pid %s using stack method %s with native mode %s", - pid, - method, - native_mode, - ) - - try: - with ProcessManager.create_from_pid(pid, stop_process) as manager: - yield from _get_process_threads(manager, pid, native_mode, locals, method) - except RuntimeError as e: - raise EngineError(*e.args, pid=pid) from e - - -###################### -# COREFILE FUNCTIONS # -###################### - - -def get_process_threads_for_core( - core_file: pathlib.Path, - executable: pathlib.Path, - library_search_path: str = None, - native_mode: NativeReportingMode = NativeReportingMode.PYTHON, - locals: bool = False, - method: StackMethod = StackMethod.AUTO, -) -> Iterable[PyThread]: - """Return an iterable of Thread objects that are registered with the given core file - - Args: - core_file (pathlib.Path): The location of the core file to analyze. - executable (pathlib.Path): The location of the executable that the core file - was created from. - library_search_path (str): A ":"-separated list of directories to use when - trying to locate missing shared libraries in the core file. - native_mode (NativeReportingMode): If set to PYTHON, include the - native (C/C++) stack in the returned Thread objects for all threads - registered with the interpreter. If set to ALL, native stacks - from threads not registered with the interpreter will be provided - as well. By default this is set to OFF and native stacks are not - returned. - locals (bool): If **True**, retrieve the local variables and arguments for - every retrieved frame (may slow down the processing). - method (StackMethod): The method to locate the relevant Python structs - that are needed to unwind the Python stack. - - Returns: - Iterable of Thread objects. - """ - if not isinstance(method, StackMethod): - raise ValueError("Invalid method for stack analysis") - - LOGGER.info( - "Analyzing core file %s with executable %s using stack method %s with native mode %s", - core_file, - executable, - method, - native_mode, - ) - try: - yield from _get_process_threads_for_core( - core_file, executable, library_search_path, native_mode, locals, method - ) - except RuntimeError as e: - raise EngineError(*e.args, corefile=core_file) from e - - -def _get_process_threads_for_core( - corefile: pathlib.Path, - executable: pathlib.Path, - library_search_path: str = None, - native_mode: NativeReportingMode = NativeReportingMode.PYTHON, - locals: bool = False, - method: StackMethod = StackMethod.AUTO, -) -> Iterable[PyThread]: - cdef ProcessManager pymanager = ProcessManager.create_from_core( - corefile, executable, library_search_path - ) - - LOGGER.debug("Available memory maps for core:") - for mem_map in pymanager.virtual_maps: - LOGGER.debug(mem_map) - - cdef shared_ptr[AbstractProcessManager] manager = pymanager.get_manager() - - if native_mode != NativeReportingMode.ALL: - _check_interpreter_shutdown(pymanager) - - cdef remote_addr_t head = _get_interpreter_state_addr( - manager.get(), method, core=True - ) - - if not head and native_mode != NativeReportingMode.ALL: - raise NotEnoughInformation( - "Could not gather enough information to extract the Python frame information" - ) - - all_tids = list(manager.get().Tids()) - - if head: - native = native_mode in {NativeReportingMode.PYTHON, NativeReportingMode.ALL} - for thread in _construct_threads_from_interpreter_state( - manager, head, pymanager.pid, pymanager.python_version, native, locals - ): - if thread.tid in all_tids: - all_tids.remove(thread.tid) - yield thread - - if native_mode == NativeReportingMode.ALL: - yield from _construct_os_threads(manager, pymanager.pid, all_tids) diff --git a/src/pystack/_pystack/CMakeLists.txt b/src/pystack/_pystack/CMakeLists.txt index 5a0fd8a8..3621e1fa 100644 --- a/src/pystack/_pystack/CMakeLists.txt +++ b/src/pystack/_pystack/CMakeLists.txt @@ -1,26 +1,69 @@ -cmake_minimum_required(VERSION 2.8) -project(_pystack) -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) - -find_package(PythonInterp 3.7 REQUIRED) -find_package(PythonLibs 3.7 REQUIRED) -IF(NOT PYTHONLIBS_FOUND OR NOT PYTHON_EXECUTABLE) - MESSAGE(SEND_ERROR "You need Python to build Python binding") -ENDIF(NOT PYTHONLIBS_FOUND OR NOT PYTHON_EXECUTABLE) - -add_library(_pystack STATIC - corefile.cpp - unwinder.cpp - logging.cpp - mem.cpp - process.cpp - pycode.cpp - pyframe.cpp - pythread.cpp - version.cpp - elf_common.cpp - pytypes.cpp) -set_property(TARGET _pystack PROPERTY POSITION_INDEPENDENT_CODE ON) -include_directories("." "cpython" ${PYTHON_INCLUDE_DIRS}) +# PyStack C++ extension module via nanobind + +# Find pthreads +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + +# Collect all C++ source files +set(PYSTACK_SOURCES + corefile.cpp + elf_common.cpp + logging.cpp + maps_parser.cpp + mem.cpp + process.cpp + pycode.cpp + pyframe.cpp + pythread.cpp + pytypes.cpp + thread_builder.cpp + unwinder.cpp + version.cpp + version_detector.cpp + bindings.cpp +) + +# Create the nanobind module +nanobind_add_module( + _pystack + STABLE_ABI + NB_STATIC + ${PYSTACK_SOURCES} +) + +# Include directories +# Note: We only include the source directory, not cpython/ directly. +# The cpython headers are included with "cpython/..." prefix to avoid +# conflicts with system headers (e.g., cpython/pthread.h vs /usr/include/pthread.h) +target_include_directories(_pystack PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${LIBELF_INCLUDE_DIRS} + ${LIBDW_INCLUDE_DIRS} +) + +# Link libraries +target_link_libraries(_pystack PRIVATE + ${LIBELF_LIBRARIES} + ${LIBDW_LIBRARIES} + Threads::Threads + dl + stdc++fs +) + +# Compiler definitions +target_compile_definitions(_pystack PRIVATE + ${LIBELF_CFLAGS_OTHER} + ${LIBDW_CFLAGS_OTHER} +) + +# Add pthread compile options (needed for C++ threading support on Linux) +target_compile_options(_pystack PRIVATE -pthread) + +# Link directories +target_link_directories(_pystack PRIVATE + ${LIBELF_LIBRARY_DIRS} + ${LIBDW_LIBRARY_DIRS} +) + +# Install the module (destination is relative to wheel.install-dir in pyproject.toml) +install(TARGETS _pystack LIBRARY DESTINATION .) diff --git a/src/pystack/_pystack/__init__.pxd b/src/pystack/_pystack/__init__.pxd deleted file mode 100644 index e69de29b..00000000 diff --git a/src/pystack/_pystack/bindings.cpp b/src/pystack/_pystack/bindings.cpp new file mode 100644 index 00000000..14a6ba86 --- /dev/null +++ b/src/pystack/_pystack/bindings.cpp @@ -0,0 +1,791 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "corefile.h" +#include "elf_common.h" +#include "logging.h" +#include "maps_parser.h" +#include "mem.h" +#include "process.h" +#include "thread_builder.h" + +namespace nb = nanobind; +using namespace nb::literals; +// Note: We don't use "using namespace pystack;" because it conflicts with Python's PyObject + +// Simple exception classes that store the message +class NotEnoughInformationError : public std::exception +{ + public: + explicit NotEnoughInformationError(const std::string& message) + : d_message(message) + { + } + const char* what() const noexcept override + { + return d_message.c_str(); + } + + private: + std::string d_message; +}; + +class EngineError : public std::exception +{ + public: + explicit EngineError(const std::string& message) + : d_message(message) + { + } + const char* what() const noexcept override + { + return d_message.c_str(); + } + + private: + std::string d_message; +}; + +[[noreturn]] void +raise_not_enough_information(const char* message) +{ + throw NotEnoughInformationError(message); +} + +// StackMethod enum values (must match Python enum) +enum class StackMethod { + ELF_DATA = 1 << 0, + SYMBOLS = 1 << 1, + BSS = 1 << 2, + ANONYMOUS_MAPS = 1 << 3, + HEAP = 1 << 4, + DEBUG_OFFSETS = 1 << 5, + AUTO = (1 << 5) | (1 << 0) | (1 << 1) | (1 << 2), + ALL = (1 << 5) | (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4), +}; + +enum class NativeReportingMode { + OFF = 0, + PYTHON = 1, + ALL = 1000, + LAST = 2000, +}; + +class CoreFileAnalyzerWrapper +{ + public: + CoreFileAnalyzerWrapper( + const std::filesystem::path& corefile, + std::optional executable = std::nullopt, + std::optional lib_search_path = std::nullopt) + : d_ignored_libs({"ld-linux", "linux-vdso"}) + { + std::string corefile_str = corefile.string(); + if (executable && lib_search_path) { + d_analyzer = std::make_shared( + corefile_str, + executable->string(), + lib_search_path->string()); + } else if (executable) { + d_analyzer = std::make_shared(corefile_str, executable->string()); + } else { + d_analyzer = std::make_shared(corefile_str); + } + d_extractor = std::make_unique(d_analyzer); + } + + nb::list extract_maps() + { + auto mapped_files = d_extractor->extractMappedFiles(); + auto memory_maps = d_extractor->MemoryMaps(); + auto maps = parseCoreFileMaps(mapped_files, memory_maps); + + nb::module_ pystack_maps = nb::module_::import_("pystack.maps"); + nb::object VirtualMap = pystack_maps.attr("VirtualMap"); + + nb::list result; + for (const auto& map : maps) { + std::string path_str = map.Path(); + nb::object path_obj = + path_str.empty() ? nb::none() : nb::cast(std::filesystem::path(path_str)); + nb::object vm = VirtualMap( + map.Start(), + map.End(), + map.FileSize(), + map.Offset(), + map.Device(), + map.Flags(), + map.Inode(), + path_obj); + result.append(vm); + } + return result; + } + + int extract_pid() + { + return d_extractor->Pid(); + } + + std::filesystem::path extract_executable() + { + return std::filesystem::path(d_extractor->extractExecutable()); + } + + nb::dict extract_failure_info() + { + auto info = d_extractor->extractFailureInfo(); + nb::dict result; + result["si_signo"] = info.si_signo; + result["si_errno"] = info.si_errno; + result["si_code"] = info.si_code; + result["sender_pid"] = info.sender_pid; + result["sender_uid"] = info.sender_uid; + result["failed_addr"] = info.failed_addr; + return result; + } + + nb::dict extract_ps_info() + { + auto info = d_extractor->extractPSInfo(); + nb::dict result; + result["state"] = static_cast(info.state); + result["sname"] = static_cast(info.sname); + result["zomb"] = static_cast(info.zomb); + result["nice"] = static_cast(info.nice); + result["flag"] = info.flag; + result["uid"] = info.uid; + result["gid"] = info.gid; + result["pid"] = info.pid; + result["ppid"] = info.ppid; + result["pgrp"] = info.pgrp; + result["sid"] = info.sid; + result["fname"] = std::string(info.fname); + result["psargs"] = std::string(info.psargs); + return result; + } + + std::vector missing_modules() + { + std::vector result; + for (const auto& mod : d_extractor->missingModules()) { + if (!isIgnoredLib(mod)) { + result.push_back(mod); + } + } + for (const auto& memmap : d_extractor->MemoryMaps()) { + std::string path = memmap.path; + if (path.empty() || isIgnoredLib(path)) { + continue; + } + // Check if path exists + std::ifstream f(path); + if (f.good()) { + continue; + } + // Check if already in result + auto fname = std::filesystem::path(path).filename().string(); + bool found = false; + for (const auto& r : result) { + if (std::filesystem::path(r).filename().string() == fname) { + found = true; + break; + } + } + if (!found) { + result.push_back(path); + } + } + return result; + } + + nb::dict extract_module_load_points() + { + nb::dict result; + for (const auto& mod : d_extractor->ModuleInformation()) { + auto name = std::filesystem::path(mod.filename).filename().string(); + result[nb::cast(name)] = mod.start; + } + return result; + } + + nb::list extract_build_ids() + { + nb::list result; + auto memory_maps = d_extractor->MemoryMaps(); + auto module_info = d_extractor->ModuleInformation(); + + std::unordered_map maps_by_file; + for (const auto& map : memory_maps) { + maps_by_file[map.path] = map.buildid; + } + + for (const auto& mod : module_info) { + if (isIgnoredLib(mod.filename)) { + continue; + } + auto map_buildid_it = maps_by_file.find(mod.filename); + std::string map_buildid = + (map_buildid_it != maps_by_file.end()) ? map_buildid_it->second : ""; + result.append(nb::make_tuple(mod.filename, mod.buildid, map_buildid)); + } + return result; + } + + private: + bool isIgnoredLib(const std::string& path) + { + for (const auto& prefix : d_ignored_libs) { + if (path.find(prefix) != std::string::npos) { + return true; + } + } + return false; + } + + std::shared_ptr d_analyzer; + std::unique_ptr d_extractor; + std::vector d_ignored_libs; +}; + +class ProcessManagerWrapper +{ + public: + explicit ProcessManagerWrapper(std::shared_ptr manager) + : d_manager(std::move(manager)) + { + } + + static std::unique_ptr create_from_pid(pid_t pid, bool stop_process) + { + auto manager = pystack::ProcessManager::create(pid, stop_process); + return std::make_unique(std::move(manager)); + } + + static std::unique_ptr create_from_core( + const std::filesystem::path& core_file, + const std::filesystem::path& executable, + std::optional lib_search_path) + { + std::optional lib_path_str; + if (lib_search_path) { + lib_path_str = lib_search_path->string(); + } + auto manager = pystack::CoreFileProcessManager::create( + core_file.string(), + executable.string(), + lib_path_str); + return std::make_unique(std::move(manager)); + } + + int interpreter_status() + { + return static_cast(d_manager->isInterpreterActive()); + } + + bool is_interpreter_active() + { + return d_manager->isInterpreterActive() + == pystack::AbstractProcessManager::InterpreterStatus::RUNNING; + } + + void reset() + { + d_manager.reset(); + } + + pid_t pid() const + { + return d_manager->Pid(); + } + + std::pair python_version() const + { + return d_manager->Version(); + } + + const std::vector& virtual_maps() const + { + return d_manager->MemoryMaps(); + } + + std::shared_ptr get_manager() + { + return d_manager; + } + + private: + std::shared_ptr d_manager; +}; + +nb::bytes +copy_memory_from_address(pid_t pid, uintptr_t address, size_t size) +{ + auto manager = std::make_shared(pid); + std::vector buffer(size); + manager->copyMemoryFromProcess(address, size, buffer.data()); + return nb::bytes(buffer.data(), buffer.size()); +} + +nb::object +get_bss_info(const std::filesystem::path& binary) +{ + pystack::SectionInfo info; + if (pystack::getSectionInfo(binary.string(), ".bss", &info)) { + nb::dict result; + result["name"] = info.name; + result["flags"] = info.flags; + result["addr"] = info.addr; + result["corrected_addr"] = info.corrected_addr; + result["offset"] = info.offset; + result["size"] = info.size; + return result; + } + return nb::none(); +} + +// Helper struct to hold Python type objects for thread building +struct PyTypes +{ + nb::object PyThread; + nb::object PyFrame; + nb::object PyCodeObject; + nb::object LocationInfo; + nb::object NativeFrame; + + static PyTypes load() + { + nb::module_ pystack_types = nb::module_::import_("pystack.types"); + return {pystack_types.attr("PyThread"), + pystack_types.attr("PyFrame"), + pystack_types.attr("PyCodeObject"), + pystack_types.attr("LocationInfo"), + pystack_types.attr("NativeFrame")}; + } +}; + +// Build frame chain from C++ thread data +nb::object +buildFrameChain(const pystack::PyThreadData& thread, const PyTypes& types) +{ + nb::object first_frame = nb::none(); + nb::object prev_frame = nb::none(); + + // Frames from C++ are in innermost-to-outermost order + // Python iterates via .next and expects: -> first_func -> second_func -> third_func + // So we iterate in reverse to build the list in the correct order + for (auto it = thread.frames.rbegin(); it != thread.frames.rend(); ++it) { + const auto& frame_data = *it; + nb::object location = types.LocationInfo( + frame_data.code.location.lineno, + frame_data.code.location.end_lineno, + frame_data.code.location.column, + frame_data.code.location.end_column); + nb::object code = types.PyCodeObject(frame_data.code.filename, frame_data.code.scope, location); + + nb::dict args; + for (const auto& [k, v] : frame_data.arguments) { + args[nb::cast(k)] = v; + } + nb::dict locs; + for (const auto& [k, v] : frame_data.locals) { + locs[nb::cast(k)] = v; + } + + nb::object py_frame = types.PyFrame( + prev_frame, + nb::none(), + code, + args, + locs, + frame_data.is_entry, + frame_data.is_shim); + + if (!prev_frame.is_none()) { + prev_frame.attr("next") = py_frame; + } + + if (first_frame.is_none()) { + first_frame = py_frame; + } + prev_frame = py_frame; + } + + return first_frame; +} + +// Build native frames list +nb::list +buildNativeFramesList(const std::vector& native_frames, const PyTypes& types) +{ + nb::list result; + for (const auto& nf : native_frames) { + result.append(types.NativeFrame( + nf.address, + nf.symbol, + nf.path, + nf.linenumber, + nf.colnumber, + nf.library)); + } + return result; +} + +// Build a Python thread object from C++ thread data +nb::object +buildPyThreadObject( + const pystack::PyThreadData& thread, + const PyTypes& types, + std::pair python_version) +{ + nb::object first_frame = buildFrameChain(thread, types); + nb::list native_frames = buildNativeFramesList(thread.native_frames, types); + + return types.PyThread( + thread.tid, + first_frame, + native_frames, + thread.gil_status, + thread.gc_status, + nb::make_tuple(python_version.first, python_version.second), + "name"_a = thread.name ? nb::cast(*thread.name) : nb::none()); +} + +// Build a native-only thread object (no Python frames) +nb::object +buildNativeOnlyThreadObject(const pystack::PyThreadData& thread, const PyTypes& types) +{ + nb::list native_frames = buildNativeFramesList(thread.native_frames, types); + + return types.PyThread( + thread.tid, + nb::none(), + native_frames, + 0, + 0, + nb::none(), + "name"_a = thread.name ? nb::cast(*thread.name) : nb::none()); +} + +// Log interpreter status +void +logInterpreterStatus(int status) +{ + if (status == static_cast(pystack::AbstractProcessManager::InterpreterStatus::FINALIZED)) { + pystack::LOG(pystack::WARNING) + << "The interpreter is shutting itself down so it is possible that no " + "Python stack trace is available for inspection."; + } else if (status == static_cast(pystack::AbstractProcessManager::InterpreterStatus::RUNNING)) { + pystack::LOG(pystack::INFO) << "An active interpreter has been detected"; + } +} + +// Log available memory maps +void +logMemoryMaps(const std::vector& maps, const char* source) +{ + pystack::LOG(pystack::DEBUG) << "Available memory maps for " << source << ":"; + for (const auto& map : maps) { + pystack::LOG(pystack::DEBUG) + << " " << std::hex << map.Start() << "-" << map.End() << " " << map.Path(); + } +} + +nb::object +get_process_threads( + pid_t pid, + bool stop_process, + NativeReportingMode native_mode, + bool locals, + StackMethod method) +{ + auto types = PyTypes::load(); + + try { + auto manager = ProcessManagerWrapper::create_from_pid(pid, stop_process); + logMemoryMaps(manager->virtual_maps(), "process"); + + if (native_mode != NativeReportingMode::ALL) { + logInterpreterStatus(manager->interpreter_status()); + } + + pystack::remote_addr_t head = + pystack::getInterpreterStateAddr(manager->get_manager().get(), static_cast(method)); + + if (head == 0 && native_mode != NativeReportingMode::ALL) { + raise_not_enough_information( + "Could not gather enough information to extract the Python frame information"); + } + + nb::list result; + std::vector all_tids = pystack::getThreadIds(manager->get_manager()); + + if (head != 0) { + bool add_native = native_mode != NativeReportingMode::OFF; + auto threads = pystack::buildThreadsFromInterpreter( + manager->get_manager(), + head, + pid, + add_native, + locals); + + for (const auto& thread : threads) { + result.append(buildPyThreadObject(thread, types, manager->python_version())); + all_tids.erase( + std::remove(all_tids.begin(), all_tids.end(), thread.tid), + all_tids.end()); + } + } + + if (native_mode == NativeReportingMode::ALL) { + for (int tid : all_tids) { + auto thread = pystack::buildNativeThread(manager->get_manager(), pid, tid); + result.append(buildNativeOnlyThreadObject(thread, types)); + } + } + + manager->reset(); + return result; + } catch (const NotEnoughInformationError&) { + throw; + } catch (const EngineError&) { + throw; + } catch (const std::exception& e) { + throw EngineError(e.what()); + } +} + +nb::object +get_process_threads_for_core( + const std::filesystem::path& core_file, + const std::filesystem::path& executable, + std::optional library_search_path, + NativeReportingMode native_mode, + bool locals, + StackMethod method) +{ + auto types = PyTypes::load(); + + try { + auto manager = + ProcessManagerWrapper::create_from_core(core_file, executable, library_search_path); + logMemoryMaps(manager->virtual_maps(), "core"); + + if (native_mode != NativeReportingMode::ALL) { + logInterpreterStatus(manager->interpreter_status()); + } + + pystack::remote_addr_t head = + pystack::getInterpreterStateAddr(manager->get_manager().get(), static_cast(method)); + + if (head == 0 && native_mode != NativeReportingMode::ALL) { + raise_not_enough_information( + "Could not gather enough information to extract the Python frame information"); + } + + nb::list result; + std::vector all_tids = pystack::getThreadIds(manager->get_manager()); + + if (head != 0) { + bool add_native = native_mode == NativeReportingMode::PYTHON + || native_mode == NativeReportingMode::ALL; + auto threads = pystack::buildThreadsFromInterpreter( + manager->get_manager(), + head, + manager->pid(), + add_native, + locals); + + for (const auto& thread : threads) { + result.append(buildPyThreadObject(thread, types, manager->python_version())); + all_tids.erase( + std::remove(all_tids.begin(), all_tids.end(), thread.tid), + all_tids.end()); + } + } + + if (native_mode == NativeReportingMode::ALL) { + for (int tid : all_tids) { + auto thread = pystack::buildNativeThread(manager->get_manager(), manager->pid(), tid); + result.append(buildNativeOnlyThreadObject(thread, types)); + } + } + + return result; + } catch (const NotEnoughInformationError&) { + throw; + } catch (const EngineError&) { + throw; + } catch (const std::exception& e) { + throw EngineError(e.what()); + } +} + +void +_check_interpreter_shutdown(nb::object manager) +{ + int status = nb::cast(manager.attr("interpreter_status")()); + + if (status == static_cast(pystack::AbstractProcessManager::InterpreterStatus::FINALIZED)) { + pystack::LOG(pystack::WARNING) + << "The interpreter is shutting itself down so it is possible that no " + "Python stack trace is available for inspection."; + } else if (status != -1) { + // -1 means failed to detect, 2 means FINALIZED (already handled above) + // Other values mean running/active + pystack::LOG(pystack::INFO) << "An active interpreter has been detected"; + } +} + +NB_MODULE(_pystack, m) +{ + m.doc() = "PyStack native extension module"; + + nb::register_exception_translator([](const std::exception_ptr& p, void*) { + try { + if (p) std::rethrow_exception(p); + } catch (const NotEnoughInformationError& e) { + nb::object exc_type = nb::module_::import_("pystack.errors").attr("NotEnoughInformation"); + PyErr_SetString(exc_type.ptr(), e.what()); + } catch (const EngineError& e) { + nb::object exc_type = nb::module_::import_("pystack.errors").attr("EngineError"); + PyErr_SetString(exc_type.ptr(), e.what()); + } + }); + + pystack::initializePythonLoggerInterface(); + + nb::enum_(m, "StackMethod", nb::is_flag()) + .value("ELF_DATA", StackMethod::ELF_DATA) + .value("SYMBOLS", StackMethod::SYMBOLS) + .value("BSS", StackMethod::BSS) + .value("ANONYMOUS_MAPS", StackMethod::ANONYMOUS_MAPS) + .value("HEAP", StackMethod::HEAP) + .value("DEBUG_OFFSETS", StackMethod::DEBUG_OFFSETS) + .value("AUTO", StackMethod::AUTO) + .value("ALL", StackMethod::ALL); + + nb::enum_(m, "NativeReportingMode") + .value("OFF", NativeReportingMode::OFF) + .value("PYTHON", NativeReportingMode::PYTHON) + .value("ALL", NativeReportingMode::ALL) + .value("LAST", NativeReportingMode::LAST); + + nb::class_(m, "CoreFileAnalyzer") + .def(nb::init< + const std::filesystem::path&, + std::optional, + std::optional>(), + "core_file"_a, + "executable"_a = nb::none(), + "lib_search_path"_a = nb::none()) + .def("extract_maps", &CoreFileAnalyzerWrapper::extract_maps) + .def("extract_pid", &CoreFileAnalyzerWrapper::extract_pid) + .def("extract_executable", &CoreFileAnalyzerWrapper::extract_executable) + .def("extract_failure_info", &CoreFileAnalyzerWrapper::extract_failure_info) + .def("extract_ps_info", &CoreFileAnalyzerWrapper::extract_ps_info) + .def("missing_modules", &CoreFileAnalyzerWrapper::missing_modules) + .def("extract_module_load_points", &CoreFileAnalyzerWrapper::extract_module_load_points) + .def("extract_build_ids", &CoreFileAnalyzerWrapper::extract_build_ids); + + nb::class_(m, "ProcessManager") + .def_static( + "create_from_pid", + &ProcessManagerWrapper::create_from_pid, + "pid"_a, + "stop_process"_a = true) + .def_static( + "create_from_core", + &ProcessManagerWrapper::create_from_core, + "core_file"_a, + "executable"_a, + "lib_search_path"_a = nb::none()) + .def("interpreter_status", &ProcessManagerWrapper::interpreter_status) + .def("is_interpreter_active", &ProcessManagerWrapper::is_interpreter_active) + .def_prop_ro("pid", &ProcessManagerWrapper::pid) + .def_prop_ro("python_version", &ProcessManagerWrapper::python_version) + .def( + "__enter__", + [](ProcessManagerWrapper& self) -> ProcessManagerWrapper& { return self; }, + nb::rv_policy::reference) + .def("__exit__", [](ProcessManagerWrapper& self, nb::args) { self.reset(); }); + + m.def("copy_memory_from_address", + ©_memory_from_address, + "pid"_a, + "address"_a, + "size"_a, + "Copy memory from a remote process"); + + m.def("get_bss_info", &get_bss_info, "binary"_a, "Get BSS section information from an ELF binary"); + + // Note: We use nb::arg().none() to allow None to be passed explicitly + m.def( + "get_process_threads", + [](pid_t pid, + bool stop_process, + NativeReportingMode native_mode, + bool locals, + nb::object method_obj) { + if (method_obj.is_none()) { + throw std::invalid_argument("Invalid method for stack analysis"); + } + StackMethod method = nb::cast(method_obj); + return get_process_threads(pid, stop_process, native_mode, locals, method); + }, + "pid"_a, + "stop_process"_a = true, + "native_mode"_a = NativeReportingMode::OFF, + "locals"_a = false, + nb::arg("method").none() = nb::cast(StackMethod::AUTO), + "Return an iterable of Thread objects from a live process"); + + m.def( + "get_process_threads_for_core", + [](const std::filesystem::path& core_file, + const std::filesystem::path& executable, + std::optional library_search_path, + NativeReportingMode native_mode, + bool locals, + nb::object method_obj) { + if (method_obj.is_none()) { + throw std::invalid_argument("Invalid method for stack analysis"); + } + StackMethod method = nb::cast(method_obj); + return get_process_threads_for_core( + core_file, + executable, + library_search_path, + native_mode, + locals, + method); + }, + "core_file"_a, + "executable"_a, + "library_search_path"_a = nb::none(), + "native_mode"_a = NativeReportingMode::PYTHON, + "locals"_a = false, + nb::arg("method").none() = nb::cast(StackMethod::AUTO), + "Return an iterable of Thread objects from a core file"); + + m.def("_check_interpreter_shutdown", + &_check_interpreter_shutdown, + "manager"_a, + "Check interpreter shutdown status and log appropriately"); + + // intercept_runtime_errors decorator - re-export from pystack.errors + nb::module_ pystack_errors = nb::module_::import_("pystack.errors"); + m.attr("intercept_runtime_errors") = pystack_errors.attr("intercept_runtime_errors"); +} diff --git a/src/pystack/_pystack/corefile.pxd b/src/pystack/_pystack/corefile.pxd deleted file mode 100644 index 629f77af..00000000 --- a/src/pystack/_pystack/corefile.pxd +++ /dev/null @@ -1,55 +0,0 @@ -from posix.types cimport pid_t - -from _pystack.elf_common cimport CoreFileAnalyzer -from _pystack.mem cimport SimpleVirtualMap -from libc.stdint cimport uintptr_t -from libcpp.memory cimport shared_ptr -from libcpp.string cimport string as cppstring -from libcpp.vector cimport vector - - -cdef extern from "corefile.h" namespace "pystack": - struct CoreCrashInfo: - int si_signo - int si_errno - int si_code - int sender_pid - int sender_uid - uintptr_t failed_addr - - struct CorePsInfo: - char state - char sname - char zomb - char nice - unsigned long flag - int uid - int gid - pid_t pid - pid_t ppid - pid_t pgrp - pid_t sid - char fname[16] - char psargs[80] - - struct CoreVirtualMap: - uintptr_t start - uintptr_t end - unsigned long filesize - cppstring flags - unsigned long offset - cppstring device - unsigned long inode - cppstring path - cppstring buildid - - cdef cppclass CoreFileExtractor: - CoreFileExtractor(shared_ptr[CoreFileAnalyzer] analyzer) except+ - int Pid() except+ - vector[CoreVirtualMap] MemoryMaps() except+ - vector[SimpleVirtualMap] ModuleInformation() except+ - cppstring extractExecutable() except+ - CoreCrashInfo extractFailureInfo() except+ - CorePsInfo extractPSInfo() except+ - vector[cppstring] missingModules() except+ - vector[CoreVirtualMap] extractMappedFiles() except+ diff --git a/src/pystack/_pystack/elf_common.pxd b/src/pystack/_pystack/elf_common.pxd deleted file mode 100644 index 233ee822..00000000 --- a/src/pystack/_pystack/elf_common.pxd +++ /dev/null @@ -1,22 +0,0 @@ -from libc.stdint cimport uintptr_t -from libcpp.string cimport string as cppstring - - -cdef extern from "elf_common.h" namespace "pystack": - cdef cppclass ProcessAnalyzer: - ProcessAnalyzer(int pid) except+ - - cdef cppclass CoreFileAnalyzer: - CoreFileAnalyzer(cppstring filename) except+ - CoreFileAnalyzer(cppstring filename, cppstring executable) except+ - CoreFileAnalyzer(cppstring filename, cppstring executable, cppstring lib_search_path) except+ - - struct SectionInfo: - cppstring name - cppstring flags - uintptr_t addr - uintptr_t corrected_addr - size_t offset - size_t size - - int getSectionInfo(const cppstring& filename, const cppstring& section_name, SectionInfo* result) except+ diff --git a/src/pystack/_pystack/logging.cpp b/src/pystack/_pystack/logging.cpp index 4ac13a73..b0a3cb74 100644 --- a/src/pystack/_pystack/logging.cpp +++ b/src/pystack/_pystack/logging.cpp @@ -1,29 +1,101 @@ #include #include -#include "../_pystack_api.h" +#define PY_SSIZE_T_CLEAN +#include + #include "logging.h" namespace pystack { +static PyObject* g_logger = nullptr; static int LOGGER_INITIALIZED = false; void initializePythonLoggerInterface() { - import_pystack___pystack(); + if (LOGGER_INITIALIZED) { + return; + } + + // Import the logging module and get a logger + PyObject* logging_module = PyImport_ImportModule("logging"); + if (!logging_module) { + PyErr_Print(); + throw std::runtime_error("Failed to import logging module"); + } + + PyObject* getLogger = PyObject_GetAttrString(logging_module, "getLogger"); + if (!getLogger) { + Py_DECREF(logging_module); + PyErr_Print(); + throw std::runtime_error("Failed to get logging.getLogger"); + } + + // Get logger for pystack._pystack + PyObject* logger_name = PyUnicode_FromString("pystack._pystack"); + g_logger = PyObject_CallFunctionObjArgs(getLogger, logger_name, NULL); + Py_DECREF(logger_name); + Py_DECREF(getLogger); + Py_DECREF(logging_module); + + if (!g_logger) { + PyErr_Print(); + throw std::runtime_error("Failed to create logger"); + } + LOGGER_INITIALIZED = true; } void logWithPython(const std::string& message, int level) { - if (!LOGGER_INITIALIZED) { - throw std::runtime_error("Logger is not initialized"); + if (!LOGGER_INITIALIZED || !g_logger) { + return; + } + + if (PyErr_Occurred()) { + return; + } + + // Get the log method name based on level + const char* method_name; + switch (level) { + case DEBUG: + method_name = "debug"; + break; + case INFO: + method_name = "info"; + break; + case WARNING: + method_name = "warning"; + break; + case ERROR: + method_name = "error"; + break; + case CRITICAL: + method_name = "critical"; + break; + default: + method_name = "info"; + break; } - if (!PyErr_Occurred()) { - log_with_python(&message, level); + + // Call the log method + PyObject* py_message = PyUnicode_FromString(message.c_str()); + if (!py_message) { + PyErr_Clear(); + return; + } + + PyObject* result = PyObject_CallMethod(g_logger, method_name, "O", py_message); + Py_DECREF(py_message); + + if (!result) { + PyErr_Clear(); + return; } + Py_DECREF(result); } } // namespace pystack diff --git a/src/pystack/_pystack/logging.pxd b/src/pystack/_pystack/logging.pxd deleted file mode 100644 index 5be77567..00000000 --- a/src/pystack/_pystack/logging.pxd +++ /dev/null @@ -1,2 +0,0 @@ -cdef extern from "logging.h" namespace "pystack": - void initializePythonLoggerInterface() diff --git a/src/pystack/_pystack/maps_parser.cpp b/src/pystack/_pystack/maps_parser.cpp new file mode 100644 index 00000000..feeb1fd8 --- /dev/null +++ b/src/pystack/_pystack/maps_parser.cpp @@ -0,0 +1,357 @@ +#include "maps_parser.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "logging.h" + +namespace pystack { + +namespace fs = std::filesystem; + +// Regex pattern for parsing /proc/pid/maps lines +// Format: start-end permissions offset dev inode pathname +static const std::regex MAPS_REGEXP( + R"(([0-9a-f]+)-([0-9a-f]+)\s+(.{4})\s+([0-9a-f]+)\s+([0-9a-f]+:[0-9a-f]+)\s+(\d+)\s*(.*)?)"); + +std::vector +parseProcMaps(pid_t pid) +{ + std::vector maps; + std::string maps_path = "/proc/" + std::to_string(pid) + "/maps"; + + std::ifstream maps_file(maps_path); + if (!maps_file.is_open()) { + throw std::runtime_error("No such process id: " + std::to_string(pid)); + } + + std::string line; + while (std::getline(maps_file, line)) { + std::smatch match; + if (!std::regex_match(line, match, MAPS_REGEXP)) { + LOG(DEBUG) << "Line cannot be recognized: " << line; + continue; + } + + uintptr_t start = std::stoull(match[1].str(), nullptr, 16); + uintptr_t end = std::stoull(match[2].str(), nullptr, 16); + std::string permissions = match[3].str(); + unsigned long offset = std::stoul(match[4].str(), nullptr, 16); + std::string device = match[5].str(); + unsigned long inode = std::stoul(match[6].str()); + std::string pathname = match[7].str(); + + size_t start_pos = pathname.find_first_not_of(" \t"); + if (start_pos != std::string::npos) { + pathname = pathname.substr(start_pos); + } else { + pathname = ""; + } + + maps.emplace_back( + start, + end, + end - start, // filesize + permissions, + offset, + device, + inode, + pathname); + } + + return maps; +} + +std::vector +parseCoreFileMaps( + const std::vector& mapped_files, + const std::vector& memory_maps) +{ + std::set> memory_map_ranges; + for (const auto& map : memory_maps) { + memory_map_ranges.insert({map.start, map.end}); + } + + std::vector missing_mapped_files; + for (const auto& map : mapped_files) { + if (memory_map_ranges.find({map.start, map.end}) == memory_map_ranges.end()) { + missing_mapped_files.push_back(map); + } + } + + std::vector all_maps; + all_maps.reserve(memory_maps.size() + missing_mapped_files.size()); + all_maps.insert(all_maps.end(), memory_maps.begin(), memory_maps.end()); + all_maps.insert(all_maps.end(), missing_mapped_files.begin(), missing_mapped_files.end()); + + std::sort(all_maps.begin(), all_maps.end(), [](const CoreVirtualMap& a, const CoreVirtualMap& b) { + return a.start < b.start; + }); + + std::set missing_map_paths; + for (const auto& map : missing_mapped_files) { + if (!map.path.empty()) { + try { + missing_map_paths.insert(fs::canonical(map.path).string()); + } catch (...) { + missing_map_paths.insert(map.path); + } + } + } + + std::unordered_map file_maps; + for (const auto& map : memory_maps) { + if (map.path.empty()) { + continue; + } + try { + std::string resolved_path = fs::canonical(map.path).string(); + if (missing_map_paths.count(resolved_path)) { + file_maps[resolved_path] = map.path; + } + } catch (...) { + // Ignore errors resolving paths + } + } + + std::vector result; + result.reserve(all_maps.size()); + for (const auto& elem : all_maps) { + std::string path = elem.path; + if (!path.empty()) { + auto it = file_maps.find(path); + if (it != file_maps.end()) { + path = it->second; + } + } + result.emplace_back( + elem.start, + elem.end, + elem.filesize, + elem.flags, + elem.offset, + elem.device, + elem.inode, + path); + } + + return result; +} + +static VirtualMap +getBaseMap(const std::vector& binary_maps) +{ + for (const auto& map : binary_maps) { + if (!map.Path().empty()) { + return map; + } + } + if (!binary_maps.empty()) { + return binary_maps[0]; + } + throw std::runtime_error("No maps available"); +} + +static std::optional +getBss(const std::vector& elf_maps, uintptr_t load_point) +{ + if (elf_maps.empty()) { + return std::nullopt; + } + + VirtualMap binary_map = getBaseMap(elf_maps); + if (binary_map.Path().empty()) { + return std::nullopt; + } + + SectionInfo bss_info; + if (!getSectionInfo(binary_map.Path(), ".bss", &bss_info)) { + return std::nullopt; + } + + uintptr_t start = load_point + bss_info.corrected_addr; + LOG(INFO) << "Determined exact addr of .bss section: " << std::hex << start << " (" << load_point + << " + " << bss_info.corrected_addr << ")" << std::dec; + + unsigned long offset = 0; + + const VirtualMap* first_matching_map = nullptr; + for (const auto& map : elf_maps) { + if (map.containsAddr(start)) { + first_matching_map = ↦ + break; + } + } + + if (!first_matching_map) { + return std::nullopt; + } + + offset = first_matching_map->Offset() + (start - first_matching_map->Start()); + + return VirtualMap( + start, + start + bss_info.size, + bss_info.size, + "", // flags + offset, // offset + "", // device + 0, // inode + ""); // path +} + +ProcessMemoryMapInfo +parseMapInformation( + const std::string& binary, + const std::vector& maps, + const std::unordered_map* load_point_by_module) +{ + std::unordered_map> maps_by_library; + std::string current_lib; + + std::unordered_map computed_load_points; + if (!load_point_by_module) { + for (const auto& map : maps) { + if (!map.Path().empty()) { + std::string name = fs::path(map.Path()).filename().string(); + if (computed_load_points.find(name) == computed_load_points.end()) { + computed_load_points[name] = map.Start(); + } else { + computed_load_points[name] = std::min(computed_load_points[name], map.Start()); + } + } + } + load_point_by_module = &computed_load_points; + } + + for (const auto& memory_range : maps) { + std::string path_name; + if (!memory_range.Path().empty()) { + path_name = fs::path(memory_range.Path()).filename().string(); + current_lib = path_name; + } else { + path_name = current_lib; + } + maps_by_library[path_name].push_back(memory_range); + } + + std::string binary_name = fs::path(binary).filename().string(); + + auto python_it = maps_by_library.find(binary_name); + if (python_it == maps_by_library.end()) { + // Construct error message with available maps + std::ostringstream available; + for (const auto& map : maps) { + if (!map.Path().empty() && map.Path().find(".so") == std::string::npos) { + available << map.Path() << ", "; + } + } + std::string available_str = available.str(); + if (available_str.length() >= 2) { + available_str = available_str.substr(0, available_str.length() - 2); + } + throw std::runtime_error( + "Unable to find maps for the executable " + binary + + ". Available executable maps: " + available_str); + } + + const std::vector& binary_maps = python_it->second; + VirtualMap python = getBaseMap(binary_maps); + LOG(INFO) << "python binary first map found: " << python.Path(); + + std::optional libpython; + const std::vector* elf_maps = nullptr; + std::string libpython_name; + + std::vector libpython_binaries; + for (const auto& [lib_name, _] : maps_by_library) { + if (lib_name.find("libpython") != std::string::npos) { + libpython_binaries.push_back(lib_name); + } + } + + uintptr_t load_point = 0; + if (libpython_binaries.size() > 1) { + throw std::runtime_error( + "Unexpectedly found multiple libpython in process: " + + std::to_string(libpython_binaries.size())); + } else if (libpython_binaries.size() == 1) { + libpython_name = libpython_binaries[0]; + const auto& libpython_maps = maps_by_library[libpython_name]; + elf_maps = &libpython_maps; + auto load_it = load_point_by_module->find(libpython_name); + load_point = (load_it != load_point_by_module->end()) ? load_it->second : UINTPTR_MAX; + libpython = getBaseMap(libpython_maps); + LOG(INFO) << libpython_name << " first map found: " << libpython->Path(); + } else { + LOG(INFO) << "Process does not have a libpython.so, reading from binary"; + elf_maps = &binary_maps; + auto load_it = load_point_by_module->find(binary_name); + load_point = (load_it != load_point_by_module->end()) ? load_it->second : UINTPTR_MAX; + } + + std::optional heap; + auto heap_it = maps_by_library.find("[heap]"); + if (heap_it != maps_by_library.end() && !heap_it->second.empty()) { + heap = heap_it->second.front(); + LOG(INFO) << "Heap map found"; + } + + std::optional bss = getBss(*elf_maps, load_point); + if (!bss) { + for (const auto& map : *elf_maps) { + if (map.Path().empty() && map.Flags().find('r') != std::string::npos) { + bss = map; + break; + } + } + } + if (bss) { + LOG(INFO) << "bss map found"; + } + + return ProcessMemoryMapInfo{heap, bss, python, libpython}; +} + +ProcessMemoryMapInfo +parseMapInformationForProcess(pid_t pid, const std::vector& maps) +{ + std::string exe_link = "/proc/" + std::to_string(pid) + "/exe"; + char exe_path[PATH_MAX]; + ssize_t len = readlink(exe_link.c_str(), exe_path, sizeof(exe_path) - 1); + if (len == -1) { + throw std::runtime_error("Failed to read /proc/" + std::to_string(pid) + "/exe"); + } + exe_path[len] = '\0'; + return parseMapInformation(exe_path, maps); +} + +std::optional +getThreadName(pid_t pid, pid_t tid) +{ + std::string comm_path = "/proc/" + std::to_string(pid) + "/task/" + std::to_string(tid) + "/comm"; + std::ifstream comm_file(comm_path); + if (!comm_file.is_open()) { + return std::nullopt; + } + + std::string name; + std::getline(comm_file, name); + + size_t end = name.find_last_not_of(" \t\n\r"); + if (end != std::string::npos) { + name = name.substr(0, end + 1); + } + + return name; +} + +} // namespace pystack diff --git a/src/pystack/_pystack/maps_parser.h b/src/pystack/_pystack/maps_parser.h new file mode 100644 index 00000000..9678e9ce --- /dev/null +++ b/src/pystack/_pystack/maps_parser.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "corefile.h" +#include "elf_common.h" +#include "mem.h" + +namespace pystack { + +struct ProcessMemoryMapInfo +{ + std::optional heap; + std::optional bss; + VirtualMap python; + std::optional libpython; +}; + +std::vector +parseProcMaps(pid_t pid); + +std::vector +parseCoreFileMaps( + const std::vector& mapped_files, + const std::vector& memory_maps); + +ProcessMemoryMapInfo +parseMapInformation( + const std::string& binary, + const std::vector& maps, + const std::unordered_map* load_point_by_module = nullptr); + +ProcessMemoryMapInfo +parseMapInformationForProcess(pid_t pid, const std::vector& maps); + +std::optional +getThreadName(pid_t pid, pid_t tid); + +} // namespace pystack diff --git a/src/pystack/_pystack/mem.cpp b/src/pystack/_pystack/mem.cpp index 825f3c62..44c6aac6 100644 --- a/src/pystack/_pystack/mem.cpp +++ b/src/pystack/_pystack/mem.cpp @@ -113,49 +113,6 @@ VirtualMap::Size() const return d_end - d_start; } -MemoryMapInformation::MemoryMapInformation() -: d_main_map(std::nullopt) -, d_bss(std::nullopt) -, d_heap(std::nullopt) -{ -} - -const std::optional& -MemoryMapInformation::MainMap() -{ - return d_main_map; -} - -const std::optional& -MemoryMapInformation::Bss() -{ - return d_bss; -} - -const std::optional& -MemoryMapInformation::Heap() -{ - return d_heap; -} - -void -MemoryMapInformation::setMainMap(const VirtualMap& main_map) -{ - d_main_map = main_map; -} - -void -MemoryMapInformation::setBss(const VirtualMap& bss) -{ - d_bss = bss; -} - -void -MemoryMapInformation::setHeap(const VirtualMap& heap) -{ - d_heap = heap; -} - LRUCache::LRUCache(size_t capacity) : d_cache_capacity(capacity) , d_size(0){}; diff --git a/src/pystack/_pystack/mem.h b/src/pystack/_pystack/mem.h index b78b6674..64409ebc 100644 --- a/src/pystack/_pystack/mem.h +++ b/src/pystack/_pystack/mem.h @@ -81,6 +81,27 @@ class VirtualMap // Methods bool containsAddr(remote_addr_t addr) const; + // Permission helpers + bool isExecutable() const + { + return d_flags.find('x') != std::string::npos; + } + + bool isReadable() const + { + return d_flags.find('r') != std::string::npos; + } + + bool isWritable() const + { + return d_flags.find('w') != std::string::npos; + } + + bool isPrivate() const + { + return d_flags.find('p') != std::string::npos; + } + private: // Data members uintptr_t d_start{}; @@ -93,29 +114,6 @@ class VirtualMap std::string d_path{}; }; -class MemoryMapInformation -{ - public: - MemoryMapInformation(); - - // Getters - const std::optional& MainMap(); - const std::optional& Bss(); - const std::optional& Heap(); - - // Setters - - void setMainMap(const VirtualMap& main_map); - void setBss(const VirtualMap& bss); - void setHeap(const VirtualMap& heap); - - private: - // Data members - std::optional d_main_map; - std::optional d_bss; - std::optional d_heap; -}; - class LRUCache { private: diff --git a/src/pystack/_pystack/mem.pxd b/src/pystack/_pystack/mem.pxd deleted file mode 100644 index 02cfcce3..00000000 --- a/src/pystack/_pystack/mem.pxd +++ /dev/null @@ -1,33 +0,0 @@ -from libc.stdint cimport uintptr_t -from libcpp.string cimport string as cppstring -from libcpp.vector cimport vector - - -cdef extern from "mem.h" namespace "pystack": - ctypedef uintptr_t remote_addr_t - - cdef cppclass AbstractRemoteMemoryManager: - ssize_t copyMemoryFromProcess(remote_addr_t addr, size_t size, void *destination) except+ - - cdef cppclass ProcessMemoryManager(AbstractRemoteMemoryManager): - ProcessMemoryManager(int pid) except+ - ssize_t copyMemoryFromProcess(remote_addr_t addr, size_t size, void *destination) except+ - - - struct SimpleVirtualMap: - uintptr_t start - uintptr_t end - cppstring filename - cppstring buildid - - cdef cppclass VirtualMap: - VirtualMap() - VirtualMap(uintptr_t start, uintptr_t end, unsigned long filesize, - cppstring flags, unsigned long offset, cppstring permissions, - unsigned long inode, cppstring pathname) - - cdef cppclass MemoryMapInformation: - MemoryMapInformation() - void setMainMap(const VirtualMap& bss) - void setBss(const VirtualMap& bss) - void setHeap(const VirtualMap& heap) diff --git a/src/pystack/_pystack/native_frame.pxd b/src/pystack/_pystack/native_frame.pxd deleted file mode 100644 index 4aa13188..00000000 --- a/src/pystack/_pystack/native_frame.pxd +++ /dev/null @@ -1,11 +0,0 @@ -from libcpp.string cimport string as cppstring - - -cdef extern from "native_frame.h" namespace "pystack": - struct NativeFrame: - unsigned long address - cppstring symbol - cppstring path - int linenumber - int colnumber - cppstring library diff --git a/src/pystack/_pystack/process.cpp b/src/pystack/_pystack/process.cpp index 52ec0099..fcf62e45 100644 --- a/src/pystack/_pystack/process.cpp +++ b/src/pystack/_pystack/process.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -13,6 +14,7 @@ #include "corefile.h" #include "logging.h" +#include "maps_parser.h" #include "mem.h" #include "native_frame.h" #include "process.h" @@ -22,6 +24,7 @@ #include "pythread.h" #include "pytypes.h" #include "version.h" +#include "version_detector.h" namespace { @@ -63,6 +66,19 @@ class DirectoryReader } // namespace namespace pystack { +namespace fs = std::filesystem; + +namespace { + +// Helper to extract the main interpreter map from ProcessMemoryMapInfo +std::optional +getMainMap(const ProcessMemoryMapInfo& map_info) +{ + return map_info.libpython ? *map_info.libpython : map_info.python; +} + +} // namespace + namespace { // unnamed struct ParsedPyVersion @@ -213,21 +229,35 @@ ProcessTracer::getTids() const AbstractProcessManager::AbstractProcessManager( pid_t pid, std::vector&& memory_maps, - MemoryMapInformation&& map_info) + std::optional main_map, + std::optional bss, + std::optional heap) : d_pid(pid) -, d_memory_maps(memory_maps) +, d_main_map(std::move(main_map)) +, d_bss(std::move(bss)) +, d_heap(std::move(heap)) +, d_memory_maps(std::move(memory_maps)) , d_manager(nullptr) , d_unwinder(nullptr) , d_analyzer(nullptr) { - d_main_map = map_info.MainMap(); - d_bss = map_info.Bss(); - d_heap = map_info.Heap(); if (!d_main_map) { throw std::runtime_error("The main interpreter map could not be located"); } } +const std::vector& +AbstractProcessManager::MemoryMaps() const +{ + return d_memory_maps; +} + +std::pair +AbstractProcessManager::Version() const +{ + return std::make_pair(d_major, d_minor); +} + bool AbstractProcessManager::isValidDictionaryObject(remote_addr_t addr) const { @@ -1361,17 +1391,49 @@ AbstractProcessManager::findInterpreterStateFromDebugOffsets() const return 0; } +std::shared_ptr +ProcessManager::create(pid_t pid, bool stop_process) +{ + std::shared_ptr tracer; + if (stop_process) { + tracer = std::make_shared(pid); + } + + auto virtual_maps = parseProcMaps(pid); + auto map_info = parseMapInformationForProcess(pid, virtual_maps); + auto analyzer = std::make_shared(pid); + + auto manager = std::make_shared( + pid, + tracer, + analyzer, + std::move(virtual_maps), + getMainMap(map_info), + map_info.bss, + map_info.heap); + + manager->initializeVersion(pid, map_info); + return manager; +} + ProcessManager::ProcessManager( pid_t pid, const std::shared_ptr& tracer, const std::shared_ptr& analyzer, std::vector memory_maps, - MemoryMapInformation map_info) -: AbstractProcessManager(pid, std::move(memory_maps), std::move(map_info)) -, tracer(tracer) + std::optional main_map, + std::optional bss, + std::optional heap) +: AbstractProcessManager( + pid, + std::move(memory_maps), + std::move(main_map), + std::move(bss), + std::move(heap)) +, d_tracer(tracer) { - if (tracer) { - d_tids = tracer->getTids(); + if (d_tracer) { + d_tids = d_tracer->getTids(); } else { d_tids = getProcessTids(pid); } @@ -1380,25 +1442,103 @@ ProcessManager::ProcessManager( d_unwinder = std::make_unique(analyzer); } +void +ProcessManager::initializeVersion(pid_t pid, const ProcessMemoryMapInfo& map_info) +{ + // Try to get version from debug offsets first + setPythonVersionFromDebugOffsets(); + auto python_version = findPythonVersion(); + + // Fallback to external version detection if needed + if (python_version.first == -1 && python_version.second == -1) { + python_version = getVersionForProcess(pid, map_info, d_manager.get()); + } + + setPythonVersion(python_version); +} + const std::vector& ProcessManager::Tids() const { return d_tids; } +std::shared_ptr +CoreFileProcessManager::create( + const std::string& core_file, + const std::string& executable, + const std::optional& lib_search_path) +{ + std::shared_ptr analyzer; + if (lib_search_path) { + analyzer = std::make_shared(core_file, executable, *lib_search_path); + } else { + analyzer = std::make_shared(core_file, executable); + } + + auto extractor = std::make_unique(analyzer); + + auto mapped_files = extractor->extractMappedFiles(); + auto memory_maps = extractor->MemoryMaps(); + + std::unordered_map load_point_by_module; + for (const auto& mod : extractor->ModuleInformation()) { + auto name = fs::path(mod.filename).filename().string(); + load_point_by_module[name] = mod.start; + } + + auto virtual_maps = parseCoreFileMaps(mapped_files, memory_maps); + pid_t pid = extractor->Pid(); + auto map_info = parseMapInformation(executable, virtual_maps, &load_point_by_module); + + auto manager = std::make_shared( + pid, + analyzer, + std::move(virtual_maps), + getMainMap(map_info), + map_info.bss, + map_info.heap); + + manager->initializeVersion(core_file, map_info); + return manager; +} + CoreFileProcessManager::CoreFileProcessManager( pid_t pid, const std::shared_ptr& analyzer, std::vector memory_maps, - MemoryMapInformation map_info) -: AbstractProcessManager(pid, std::move(memory_maps), std::move(map_info)) + std::optional main_map, + std::optional bss, + std::optional heap) +: AbstractProcessManager( + pid, + std::move(memory_maps), + std::move(main_map), + std::move(bss), + std::move(heap)) { d_analyzer = analyzer; d_manager = std::make_unique(analyzer, d_memory_maps); - d_executable = analyzer->d_executable; - std::unique_ptr the_unwinder = std::make_unique(analyzer); - d_tids = the_unwinder->getCoreTids(); - d_unwinder = std::move(the_unwinder); + auto unwinder = std::make_unique(analyzer); + d_tids = unwinder->getCoreTids(); + d_unwinder = std::move(unwinder); +} + +void +CoreFileProcessManager::initializeVersion( + const std::string& core_file, + const ProcessMemoryMapInfo& map_info) +{ + // Try to get version from debug offsets first + setPythonVersionFromDebugOffsets(); + auto python_version = findPythonVersion(); + + // Fallback to external version detection if needed + if (python_version.first == -1 && python_version.second == -1) { + python_version = getVersionForCore(core_file, map_info); + } + + setPythonVersion(python_version); } const std::vector& diff --git a/src/pystack/_pystack/process.h b/src/pystack/_pystack/process.h index e8554b02..0f52ff3c 100644 --- a/src/pystack/_pystack/process.h +++ b/src/pystack/_pystack/process.h @@ -1,7 +1,5 @@ #pragma once -#include - #include #include #include @@ -12,6 +10,7 @@ #include #include "elf_common.h" +#include "maps_parser.h" #include "mem.h" #include "native_frame.h" #include "pycompat.h" @@ -63,11 +62,15 @@ class AbstractProcessManager : public std::enable_shared_from_this&& memory_maps, - MemoryMapInformation&& map_info); + std::optional main_map, + std::optional bss, + std::optional heap); // Getters pid_t Pid() const; virtual const std::vector& Tids() const = 0; + const std::vector& MemoryMaps() const; + std::pair Version() const; remote_addr_t getAddressFromCache(const std::string& symbol) const; void registerAddressInCache(const std::string& symbol, remote_addr_t address) const; @@ -144,13 +147,18 @@ AbstractProcessManager::copyObjectFromProcess(remote_addr_t addr, T* destination class ProcessManager : public AbstractProcessManager { public: + // Factory method + static std::shared_ptr create(pid_t pid, bool stop_process = true); + // Constructors ProcessManager( pid_t pid, const std::shared_ptr& tracer, const std::shared_ptr& analyzer, std::vector memory_maps, - MemoryMapInformation map_info); + std::optional main_map, + std::optional bss, + std::optional heap); // Destructors virtual ~ProcessManager() = default; @@ -160,19 +168,30 @@ class ProcessManager : public AbstractProcessManager private: // Data members - std::shared_ptr tracer; + std::shared_ptr d_tracer; std::vector d_tids; + + // Methods + void initializeVersion(pid_t pid, const ProcessMemoryMapInfo& map_info); }; class CoreFileProcessManager : public AbstractProcessManager { public: + // Factory method + static std::shared_ptr + create(const std::string& core_file, + const std::string& executable, + const std::optional& lib_search_path = std::nullopt); + // Constructors CoreFileProcessManager( pid_t pid, const std::shared_ptr& analyzer, std::vector memory_maps, - MemoryMapInformation map_info); + std::optional main_map, + std::optional bss, + std::optional heap); // Destructors virtual ~CoreFileProcessManager() = default; @@ -183,6 +202,8 @@ class CoreFileProcessManager : public AbstractProcessManager private: // Data members std::vector d_tids; - std::optional d_executable; + + // Methods + void initializeVersion(const std::string& core_file, const ProcessMemoryMapInfo& map_info); }; } // namespace pystack diff --git a/src/pystack/_pystack/process.pxd b/src/pystack/_pystack/process.pxd deleted file mode 100644 index ede12d58..00000000 --- a/src/pystack/_pystack/process.pxd +++ /dev/null @@ -1,41 +0,0 @@ -from _pystack.elf_common cimport CoreFileAnalyzer -from _pystack.elf_common cimport ProcessAnalyzer -from _pystack.mem cimport MemoryMapInformation -from _pystack.mem cimport VirtualMap -from _pystack.mem cimport remote_addr_t -from libc.stdint cimport uintptr_t -from libcpp.memory cimport shared_ptr -from libcpp.string cimport string as cppstring -from libcpp.utility cimport pair -from libcpp.vector cimport vector - - -cdef extern from "process.h" namespace "pystack::AbstractProcessManager": - cdef enum InterpreterStatus: - RUNNING - FINALIZED - UNKNOWN - -cdef extern from "process.h" namespace "pystack": - cdef cppclass ProcessTracer: - pass - - cdef cppclass AbstractProcessManager: - remote_addr_t scanBSS() except+ - remote_addr_t scanHeap() except+ - remote_addr_t scanAllAnonymousMaps() except+ - remote_addr_t findInterpreterStateFromDebugOffsets() except+ - remote_addr_t findInterpreterStateFromSymbols() except+ - remote_addr_t findInterpreterStateFromElfData() except+ - ssize_t copyMemoryFromProcess(remote_addr_t addr, ssize_t size, void *destination) except+ - vector[int] Tids() except+ - InterpreterStatus isInterpreterActive() except+ - pair[int, int] findPythonVersion() - void setPythonVersion(pair[int, int] version) except + - void setPythonVersionFromDebugOffsets() except + - - cdef cppclass ProcessManager(AbstractProcessManager): - ProcessManager(int pid, shared_ptr[ProcessTracer] tracer, shared_ptr[ProcessAnalyzer] analyzer, vector[VirtualMap] memory_maps, MemoryMapInformation map_info) except+ - - cdef cppclass CoreFileProcessManager(AbstractProcessManager): - CoreFileProcessManager(int pid, shared_ptr[CoreFileAnalyzer] analyzer, vector[VirtualMap] memory_maps, MemoryMapInformation map_info) except+ diff --git a/src/pystack/_pystack/pycode.pxd b/src/pystack/_pystack/pycode.pxd deleted file mode 100644 index 2596f87f..00000000 --- a/src/pystack/_pystack/pycode.pxd +++ /dev/null @@ -1,17 +0,0 @@ -from libcpp.string cimport string as cppstring -from libcpp.vector cimport vector - - -cdef extern from "pycode.h" namespace "pystack": - cdef struct LocationInfo: - int lineno - int end_lineno - int column - int end_column - - cdef cppclass CodeObject: - cppstring Filename() - cppstring Scope() - LocationInfo Location() - int NArguments() - const vector[cppstring] Varnames() diff --git a/src/pystack/_pystack/pyframe.cpp b/src/pystack/_pystack/pyframe.cpp index 62769529..70037b1c 100644 --- a/src/pystack/_pystack/pyframe.cpp +++ b/src/pystack/_pystack/pyframe.cpp @@ -19,6 +19,7 @@ FrameObject::FrameObject( { LOG(DEBUG) << "Copying frame number " << frame_no; LOG(DEBUG) << std::hex << std::showbase << "Copying frame struct from address " << addr; + Structure frame(manager, addr); d_addr = addr; @@ -36,7 +37,15 @@ FrameObject::FrameObject( auto prev_addr = frame.getField(&py_frame_v::o_back); LOG(DEBUG) << std::hex << std::showbase << "Previous frame address: " << prev_addr; if (prev_addr) { - d_prev = std::make_shared(manager, prev_addr, next_frame_no); + try { + d_prev = std::make_shared(manager, prev_addr, next_frame_no); + } catch (const RemoteMemCopyError& ex) { + // The previous frame address points to unreadable memory (e.g., guard page, + // unmapped region). Treat this as the end of the frame chain. + LOG(DEBUG) << "Failed to read previous frame at " << std::hex << std::showbase << prev_addr + << ", treating as end of frame chain: " << ex.what(); + d_prev = nullptr; + } } d_is_entry = isEntry(manager, frame); } diff --git a/src/pystack/_pystack/pyframe.pxd b/src/pystack/_pystack/pyframe.pxd deleted file mode 100644 index 7cad98e7..00000000 --- a/src/pystack/_pystack/pyframe.pxd +++ /dev/null @@ -1,19 +0,0 @@ -from _pystack.pycode cimport CodeObject -from libcpp cimport bool -from libcpp.memory cimport unique_ptr -from libcpp.string cimport string as cppstring -from libcpp.unordered_map cimport unordered_map - - -cdef extern from "pyframe.h" namespace "pystack": - - cdef cppclass FrameObject: - ssize_t FrameNo() - unique_ptr[FrameObject] PreviousFrame() - unique_ptr[CodeObject] Code() - unordered_map[cppstring, cppstring] Arguments() - unordered_map[cppstring, cppstring] Locals() - bool IsEntryFrame() - bool IsShim() - - void resolveLocalVariables() except+ diff --git a/src/pystack/_pystack/pythread.cpp b/src/pystack/_pystack/pythread.cpp index d50e4126..3607a791 100644 --- a/src/pystack/_pystack/pythread.cpp +++ b/src/pystack/_pystack/pythread.cpp @@ -71,7 +71,11 @@ findPthreadTidOffset( offsetof(_pthread_structure_with_simple_header, tid), offsetof(_pthread_structure_with_tcbhead, tid)}; for (off_t candidate : glibc_pthread_offset_candidates) { - manager->copyObjectFromProcess((remote_addr_t)(pthread_id_addr + candidate), &the_tid); + try { + manager->copyObjectFromProcess((remote_addr_t)(pthread_id_addr + candidate), &the_tid); + } catch (const RemoteMemCopyError& ex) { + continue; + } if (the_tid == manager->Pid()) { LOG(DEBUG) << "Tid offset located using GLIBC offsets at offset " << std::showbase << std::hex << candidate << " in pthread structure"; diff --git a/src/pystack/_pystack/pythread.pxd b/src/pystack/_pystack/pythread.pxd deleted file mode 100644 index 3930a6de..00000000 --- a/src/pystack/_pystack/pythread.pxd +++ /dev/null @@ -1,37 +0,0 @@ -from _pystack.native_frame cimport NativeFrame -from _pystack.process cimport AbstractProcessManager -from _pystack.process cimport remote_addr_t -from _pystack.pyframe cimport FrameObject -from libcpp.memory cimport shared_ptr -from libcpp.string cimport string as cppstring -from libcpp.vector cimport vector - - -cdef extern from "pythread.h" namespace "pystack": - cdef cppclass NativeThread "pystack::Thread": - NativeThread(int, int) except+ - int Tid() - vector[NativeFrame]& NativeFrames() - void populateNativeStackTrace(shared_ptr[AbstractProcessManager] manager) except+ - -cdef extern from "pythread.h" namespace "pystack::PyThread": - cdef enum GilStatus: - UNKNOWN = -1 - NOT_HELD = 0 - HELD = 1 - - cdef enum GCStatus: - COLLECTING_UNKNOWN = -1 - NOT_COLLECTING = 0 - COLLECTING = 1 - -cdef extern from "pythread.h" namespace "pystack": - cdef cppclass Thread "pystack::PyThread": - int Tid() - shared_ptr[FrameObject] FirstFrame() - shared_ptr[Thread] NextThread() - vector[NativeFrame]& NativeFrames() - GilStatus isGilHolder() - GCStatus isGCCollecting() - void populateNativeStackTrace(shared_ptr[AbstractProcessManager] manager) except+ - shared_ptr[Thread] getThreadFromInterpreterState(shared_ptr[AbstractProcessManager] manager, remote_addr_t addr) except+ diff --git a/src/pystack/_pystack/thread_builder.cpp b/src/pystack/_pystack/thread_builder.cpp new file mode 100644 index 00000000..629e502a --- /dev/null +++ b/src/pystack/_pystack/thread_builder.cpp @@ -0,0 +1,193 @@ +#include "thread_builder.h" + +#include "logging.h" +#include "maps_parser.h" + +namespace pystack { + +// StackMethod flags (must match Python enum values) +enum StackMethodFlag { + METHOD_ELF_DATA = 1 << 0, + METHOD_SYMBOLS = 1 << 1, + METHOD_BSS = 1 << 2, + METHOD_ANONYMOUS_MAPS = 1 << 3, + METHOD_HEAP = 1 << 4, + METHOD_DEBUG_OFFSETS = 1 << 5, +}; + +std::vector +buildFrameStack(FrameObject* first_frame, bool resolve_locals) +{ + std::vector frames; + FrameObject* current_frame = first_frame; + + while (current_frame != nullptr) { + auto code = current_frame->Code(); + // Skip frames without code (shim frames) or with unreadable code ("???") + if (!code || code->Filename() == "???") { + auto prev = current_frame->PreviousFrame(); + current_frame = prev.get(); + continue; + } + + if (resolve_locals) { + current_frame->resolveLocalVariables(); + } + + PyFrameData frame_data; + frame_data.code.filename = code->Filename(); + frame_data.code.scope = code->Scope(); + frame_data.code.location = code->Location(); + frame_data.arguments = current_frame->Arguments(); + frame_data.locals = current_frame->Locals(); + frame_data.is_entry = current_frame->IsEntryFrame(); + frame_data.is_shim = current_frame->IsShim(); + + frames.push_back(std::move(frame_data)); + + auto prev = current_frame->PreviousFrame(); + current_frame = prev.get(); + } + + return frames; +} + +PyThreadData +buildPythonThread( + const std::shared_ptr& manager, + PyThread* thread, + pid_t pid, + bool add_native_traces, + bool resolve_locals) +{ + PyThreadData data; + data.tid = thread->Tid(); + data.name = getThreadName(pid, thread->Tid()); + + LOG(INFO) << "Constructing new Python thread with tid " << data.tid; + + if (add_native_traces) { + thread->populateNativeStackTrace(manager); + } + + auto first_frame = thread->FirstFrame(); + if (first_frame) { + data.frames = buildFrameStack(first_frame.get(), resolve_locals); + } + + const auto& native_frames = thread->NativeFrames(); + data.native_frames.assign(native_frames.rbegin(), native_frames.rend()); + + data.gil_status = static_cast(thread->isGilHolder()); + data.gc_status = static_cast(thread->isGCCollecting()); + + return data; +} + +PyThreadData +buildNativeThread(const std::shared_ptr& manager, pid_t pid, pid_t tid) +{ + PyThreadData data; + data.tid = tid; + data.name = getThreadName(pid, tid); + data.gil_status = 0; // NOT_HELD + data.gc_status = 0; // NOT_COLLECTING + + LOG(INFO) << "Constructing new native thread with tid " << tid; + + Thread native_thread(pid, tid); + native_thread.populateNativeStackTrace(manager); + + const auto& native_frames = native_thread.NativeFrames(); + data.native_frames.assign(native_frames.rbegin(), native_frames.rend()); + + return data; +} + +std::vector +buildThreadsFromInterpreter( + const std::shared_ptr& manager, + remote_addr_t interpreter_head, + pid_t pid, + bool add_native_traces, + bool resolve_locals) +{ + LOG(INFO) << "Fetching Python threads"; + std::vector threads; + + auto thread = getThreadFromInterpreterState(manager, interpreter_head); + PyThread* current_thread = thread.get(); + + while (current_thread != nullptr) { + threads.push_back( + buildPythonThread(manager, current_thread, pid, add_native_traces, resolve_locals)); + + auto next = current_thread->NextThread(); + current_thread = next.get(); + } + + return threads; +} + +remote_addr_t +getInterpreterStateAddr(AbstractProcessManager* manager, int method_flags) +{ + remote_addr_t head = 0; + + struct MethodInfo + { + int flag; + const char* name; + std::function func; + }; + + std::vector methods = { + {METHOD_DEBUG_OFFSETS, + "using debug offsets data", + [&]() { return manager->findInterpreterStateFromDebugOffsets(); }}, + {METHOD_ELF_DATA, + "using ELF data", + [&]() { return manager->findInterpreterStateFromElfData(); }}, + {METHOD_SYMBOLS, + "using symbols", + [&]() { return manager->findInterpreterStateFromSymbols(); }}, + {METHOD_BSS, "scanning the BSS", [&]() { return manager->scanBSS(); }}, + {METHOD_ANONYMOUS_MAPS, + "scanning all anonymous maps", + [&]() { return manager->scanAllAnonymousMaps(); }}, + {METHOD_HEAP, "scanning the heap", [&]() { return manager->scanHeap(); }}, + }; + + for (const auto& method : methods) { + if ((method_flags & method.flag) == 0) { + continue; + } + + try { + head = method.func(); + } catch (const std::exception& exc) { + LOG(WARNING) << "Unexpected error finding PyInterpreterState by " << method.name << ": " + << exc.what(); + continue; + } + + if (head != 0) { + LOG(INFO) << "PyInterpreterState found by " << method.name << " at address 0x" << std::hex + << head << std::dec; + return head; + } else { + LOG(INFO) << "Address of PyInterpreterState not found by " << method.name; + } + } + + LOG(INFO) << "Address of PyInterpreterState could not be found"; + return 0; +} + +std::vector +getThreadIds(const std::shared_ptr& manager) +{ + return manager->Tids(); +} + +} // namespace pystack diff --git a/src/pystack/_pystack/thread_builder.h b/src/pystack/_pystack/thread_builder.h new file mode 100644 index 00000000..ac431387 --- /dev/null +++ b/src/pystack/_pystack/thread_builder.h @@ -0,0 +1,72 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "maps_parser.h" +#include "native_frame.h" +#include "process.h" +#include "pycode.h" +#include "pyframe.h" +#include "pythread.h" + +namespace pystack { + +struct PyCodeData +{ + std::string filename; + std::string scope; + LocationInfo location; +}; + +struct PyFrameData +{ + PyCodeData code; + std::unordered_map arguments; + std::unordered_map locals; + bool is_entry; + bool is_shim; +}; + +struct PyThreadData +{ + int tid; + std::optional name; + std::vector frames; + std::vector native_frames; + int gil_status; // -1 = unknown, 0 = not held, 1 = held + int gc_status; // -1 = unknown, 0 = not collecting, 1 = collecting +}; + +std::vector +buildThreadsFromInterpreter( + const std::shared_ptr& manager, + remote_addr_t interpreter_head, + pid_t pid, + bool add_native_traces, + bool resolve_locals); + +PyThreadData +buildPythonThread( + const std::shared_ptr& manager, + PyThread* thread, + pid_t pid, + bool add_native_traces, + bool resolve_locals); + +PyThreadData +buildNativeThread(const std::shared_ptr& manager, pid_t pid, pid_t tid); + +std::vector +buildFrameStack(FrameObject* first_frame, bool resolve_locals); + +remote_addr_t +getInterpreterStateAddr(AbstractProcessManager* manager, int method_flags); + +std::vector +getThreadIds(const std::shared_ptr& manager); + +} // namespace pystack diff --git a/src/pystack/_pystack/unwinder.cpp b/src/pystack/_pystack/unwinder.cpp index 2804c128..abc284b1 100644 --- a/src/pystack/_pystack/unwinder.cpp +++ b/src/pystack/_pystack/unwinder.cpp @@ -82,10 +82,25 @@ frameCallback(Dwfl_Frame* state, void* arg) Dwarf_Addr pc; bool isActivation; if (!dwfl_frame_pc(state, &pc, &isActivation)) { - LOG(DEBUG) << "dwfl_frame_pc failed"; + int dwfl_err = dwfl_errno(); + LOG(DEBUG) << "dwfl_frame_pc failed: " << (dwfl_err ? dwfl_errmsg(dwfl_err) : "no error"); + LOG(DEBUG) << "Total frames gathered before failure: " << frames->size(); return -1; } + // Get additional register info for debugging + Dwarf_Word sp = 0; +#if defined(__x86_64__) + // x86_64 stack pointer is register 7 + dwfl_frame_reg(state, 7, &sp); +#elif defined(__aarch64__) + // aarch64 stack pointer is register 31 + dwfl_frame_reg(state, 31, &sp); +#endif + + LOG(DEBUG) << std::hex << std::showbase << "frameCallback: pc=" << pc << " sp=" << sp + << " isActivation=" << isActivation << " frame_count=" << std::dec << frames->size(); + std::optional stackPointer; // Unwinding through musl libc with elfutils can get stuck returning the // same PC in a loop forever. @@ -308,6 +323,8 @@ AbstractUnwinder::gatherFrames(const std::vector& frames) const if (!raw_symname) { LOG(DEBUG) << std::hex << std::showbase << "Non-inline symbol name could not be resolved @ " << pc; + // Add frame with unknown symbol rather than skipping it + native_frames.push_back({pc, "???", mod_name, 0, 0, mod_name}); continue; } @@ -481,24 +498,34 @@ thread_callback_for_frames(Dwfl_Thread* thread, void* arg) { auto* thread_arg = static_cast(arg); pid_t tid = dwfl_thread_tid(thread); + LOG(DEBUG) << "thread_callback_for_frames: checking thread tid=" << tid << " (looking for " + << thread_arg->tid << ")"; if (tid != thread_arg->tid) { return DWARF_CB_OK; } - switch (dwfl_thread_getframes(thread, frameCallback, (void*)(&(thread_arg->frames)))) { + LOG(DEBUG) << "thread_callback_for_frames: found matching thread, calling dwfl_thread_getframes"; + int result = dwfl_thread_getframes(thread, frameCallback, (void*)(&(thread_arg->frames))); + LOG(DEBUG) << "thread_callback_for_frames: dwfl_thread_getframes returned " << result << ", got " + << thread_arg->frames.size() << " frames"; + + switch (result) { case DWARF_CB_OK: case DWARF_CB_ABORT: break; - case -1: + case -1: { // This may or may not be an error, as it can signal the end of the stack // unwinding. + int dwfl_err = dwfl_errno(); + LOG(DEBUG) << "thread_callback_for_frames: dwfl error: " + << (dwfl_err ? dwfl_errmsg(dwfl_err) : "no error"); if (thread_arg->frames.empty()) { - int dwfl_err = dwfl_errno(); std::string error( dwfl_err ? dwfl_errmsg(dwfl_err) : "unwinding failed with no error reported"); throw UnwinderError("Unknown error happened when gathering thread frames: " + error); } break; + } default: throw UnwinderError("Unknown error happened when gathering thread frames"); } diff --git a/src/pystack/_pystack/version_detector.cpp b/src/pystack/_pystack/version_detector.cpp new file mode 100644 index 00000000..8db20299 --- /dev/null +++ b/src/pystack/_pystack/version_detector.cpp @@ -0,0 +1,165 @@ +#include "version_detector.h" + +#include +#include +#include +#include +#include + +#include "logging.h" + +namespace pystack { + +namespace fs = std::filesystem; + +// Regex patterns for version detection +// Matches: "3.8.10 (default, May 26 2023, 14:05:08)" or similar version strings in BSS +static const std::regex BSS_VERSION_REGEXP( + R"(((2|3)\.(\d+)\.(\d{1,2}))((a|b|c|rc)\d{1,2})?\+?(?: (?:experimental )?free-threading build)? (\(.{1,64}\)))"); + +// Matches: python3.8, python3.10, etc. +static const std::regex BINARY_REGEXP(R"(python(\d+)\.(\d+).*)", std::regex_constants::icase); + +// Matches: libpython3.8.so, libpython3.10.so.1.0, etc. +static const std::regex LIBPYTHON_REGEXP(R"(.*libpython(\d+)\.(\d+).*)", std::regex_constants::icase); + +static std::optional +scanProcessBssForVersion(pid_t pid, const VirtualMap& bss, AbstractRemoteMemoryManager* manager) +{ + if (!manager) { + return std::nullopt; + } + + size_t size = bss.Size(); + std::vector memory(size); + + try { + ssize_t bytes_read = manager->copyMemoryFromProcess(bss.Start(), size, memory.data()); + if (bytes_read < 0) { + return std::nullopt; + } + } catch (...) { + return std::nullopt; + } + + std::string memory_str(memory.begin(), memory.end()); + std::smatch match; + if (std::regex_search(memory_str, match, BSS_VERSION_REGEXP)) { + int major = std::stoi(match[2].str()); + int minor = std::stoi(match[3].str()); + return PythonVersion(major, minor); + } + + return std::nullopt; +} + +static std::optional +scanCoreBssForVersion(const std::string& corefile, const VirtualMap& bss) +{ + std::ifstream file(corefile, std::ios::binary); + if (!file.is_open()) { + return std::nullopt; + } + + file.seekg(bss.Offset()); + if (!file.good()) { + return std::nullopt; + } + + size_t size = bss.Size(); + std::vector data(size); + file.read(data.data(), size); + if (!file.good() && !file.eof()) { + return std::nullopt; + } + + std::string data_str(data.begin(), data.end()); + std::smatch match; + if (std::regex_search(data_str, match, BSS_VERSION_REGEXP)) { + int major = std::stoi(match[2].str()); + int minor = std::stoi(match[3].str()); + return PythonVersion(major, minor); + } + + return std::nullopt; +} + +static std::optional +inferVersionFromPath(const std::string& path) +{ + std::string filename = fs::path(path).filename().string(); + + std::smatch match; + if (std::regex_match(filename, match, LIBPYTHON_REGEXP)) { + int major = std::stoi(match[1].str()); + int minor = std::stoi(match[2].str()); + LOG(INFO) << "Version inferred from libpython path: " << major << "." << minor; + return PythonVersion(major, minor); + } + + if (std::regex_match(filename, match, BINARY_REGEXP)) { + int major = std::stoi(match[1].str()); + int minor = std::stoi(match[2].str()); + LOG(INFO) << "Version inferred from binary path: " << major << "." << minor; + return PythonVersion(major, minor); + } + + return std::nullopt; +} + +static PythonVersion +getVersionFromMapInfo(const ProcessMemoryMapInfo& mapinfo) +{ + if (mapinfo.libpython && !mapinfo.libpython->Path().empty()) { + LOG(INFO) << "Trying to extract version from filename: " << mapinfo.libpython->Path(); + auto version = inferVersionFromPath(mapinfo.libpython->Path()); + if (version) { + return *version; + } + } + + if (!mapinfo.python.Path().empty()) { + LOG(INFO) << "Trying to extract version from filename: " << mapinfo.python.Path(); + auto version = inferVersionFromPath(mapinfo.python.Path()); + if (version) { + return *version; + } + } + + throw std::runtime_error("Could not determine python version from " + mapinfo.python.Path()); +} + +PythonVersion +getVersionForProcess( + pid_t pid, + const ProcessMemoryMapInfo& mapinfo, + AbstractRemoteMemoryManager* manager) +{ + if (mapinfo.bss) { + auto version = scanProcessBssForVersion(pid, *mapinfo.bss, manager); + if (version) { + LOG(INFO) << "Version found by scanning the bss section: " << version->first << "." + << version->second; + return *version; + } + } + + return getVersionFromMapInfo(mapinfo); +} + +PythonVersion +getVersionForCore(const std::string& corefile, const ProcessMemoryMapInfo& mapinfo) +{ + if (mapinfo.bss) { + auto version = scanCoreBssForVersion(corefile, *mapinfo.bss); + if (version) { + LOG(INFO) << "Version found by scanning the bss section: " << version->first << "." + << version->second; + return *version; + } + } + + return getVersionFromMapInfo(mapinfo); +} + +} // namespace pystack diff --git a/src/pystack/_pystack/version_detector.h b/src/pystack/_pystack/version_detector.h new file mode 100644 index 00000000..403b8675 --- /dev/null +++ b/src/pystack/_pystack/version_detector.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +#include "maps_parser.h" +#include "mem.h" + +namespace pystack { + +using PythonVersion = std::pair; + +PythonVersion +getVersionForProcess( + pid_t pid, + const ProcessMemoryMapInfo& mapinfo, + AbstractRemoteMemoryManager* manager); + +PythonVersion +getVersionForCore(const std::string& corefile, const ProcessMemoryMapInfo& mapinfo); + +} // namespace pystack diff --git a/src/pystack/errors.py b/src/pystack/errors.py index 5bbd58d3..4dd615cc 100644 --- a/src/pystack/errors.py +++ b/src/pystack/errors.py @@ -1,6 +1,11 @@ import pathlib +from functools import wraps from typing import Any +from typing import Callable from typing import Optional +from typing import TypeVar + +F = TypeVar("F", bound=Callable[..., Any]) DETECTED_EXECUTABLE_NOT_FOUND_TEXT = """\ The executable that was automatically located by pystack doesn't exist. @@ -128,3 +133,19 @@ class InvalidExecutable(PystackError): class MissingExecutableMaps(PystackError): HELP_TEXT = MISSING_EXECUTABLE_MAPS_HELP_TEXT + + +def intercept_runtime_errors() -> Callable[[F], F]: + """Decorator that converts RuntimeError to EngineError.""" + + def decorator(func: F) -> F: + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + try: + return func(*args, **kwargs) + except RuntimeError as e: + raise EngineError(str(e)) from e + + return wrapper # type: ignore[return-value] + + return decorator diff --git a/src/pystack/maps.py b/src/pystack/maps.py index eda70bcc..a95a43de 100644 --- a/src/pystack/maps.py +++ b/src/pystack/maps.py @@ -1,46 +1,17 @@ -import collections +"""Memory map data classes for process analysis. + +This module provides data classes for representing memory maps. +The actual parsing is done in C++. +""" import dataclasses -import logging -import os -import re from pathlib import Path -from typing import Any -from typing import Dict -from typing import Iterable -from typing import List from typing import Optional -from .errors import MissingExecutableMaps -from .errors import ProcessNotFound -from .errors import PystackError - -LOGGER = logging.getLogger(__file__) - -MAPS_REGEXP = re.compile( - r""" - (?P[\da-f]+) - - - (?P[\da-f]+) - \s - (?P....) - \s - (?P[\da-f]+) - \s - (?P[\da-f][\da-f]+:[\da-f][\da-f]+) - \s - (?P\d+) - \s* - (?P.+)? - $ - """, - re.VERBOSE, -) - -RawCoreMapList = List[Dict[str, Any]] - @dataclasses.dataclass(frozen=True, eq=True) class VirtualMap: + """Represents a memory-mapped region in a process's virtual address space.""" + start: int end: int filesize: int @@ -51,22 +22,28 @@ class VirtualMap: path: Optional[Path] def contains(self, addr: int) -> bool: + """Check if the given address is within this memory map.""" return self.start <= addr < self.end def is_executable(self) -> bool: + """Check if this memory region is executable.""" return "x" in self.flags def is_readable(self) -> bool: + """Check if this memory region is readable.""" return "r" in self.flags def is_writable(self) -> bool: + """Check if this memory region is writable.""" return "w" in self.flags def is_private(self) -> bool: + """Check if this memory region is private (copy-on-write).""" return "p" in self.flags @property def size(self) -> int: + """Return the size of this memory region.""" return self.end - self.start def __repr__(self) -> str: @@ -84,244 +61,18 @@ def __repr__(self) -> str: @dataclasses.dataclass class MemoryRange: + """Represents a range of memory addresses.""" + min_addr: int max_addr: int @dataclasses.dataclass class MemoryMapInformation: + """Container for memory map information needed for process analysis.""" + memory: MemoryRange heap: Optional[VirtualMap] bss: Optional[VirtualMap] python: VirtualMap libpython: Optional[VirtualMap] - - -def _read_maps(pid: int) -> List[str]: - try: - with open(f"/proc/{pid}/maps") as maps: - return maps.readlines() - except FileNotFoundError: - raise ProcessNotFound(f"No such process id: {pid}") from None - - -def generate_maps_for_process(pid: int) -> Iterable[VirtualMap]: - proc_maps_lines = _read_maps(pid) - for index, line in enumerate(proc_maps_lines): - line = line.rstrip("\n") - match = MAPS_REGEXP.match(line) - if not match: - LOGGER.debug("Line %r cannot be recognized!", line) - continue - - path = match.group("pathname") - yield VirtualMap( - start=int(match.group("start"), 16), - end=int(match.group("end"), 16), - filesize=int(match.group("end"), 16) - int(match.group("start"), 16), - offset=int(match.group("offset"), 16), - device=match.group("dev"), - flags=match.group("permissions"), - inode=int(match.group("inode")), - path=Path(path) if path else None, - ) - - -def generate_maps_from_core_data( - mapped_files: RawCoreMapList, memory_maps: RawCoreMapList -) -> Iterable[VirtualMap]: - memory_map_ranges = {(map["start"], map["end"]) for map in memory_maps} - missing_mapped_files = [ - map - for map in mapped_files - if (map["start"], map["end"]) not in memory_map_ranges - ] - - all_maps: RawCoreMapList = sorted( - memory_maps + missing_mapped_files, key=lambda map: map["start"] - ) - - # Some paths in the mapped files can be absolute, but we need to work with the canonical - # paths that the linker reported, so we need to "unresolve" those path back to whatever - # the memory math paths are so we can properly group then together. For example, the map - # for the interpreter may be "/usr/bin/python" in the mapped files and "/venv/bin/python" - # in the memory maps. - missing_map_paths = { - Path(map["path"]) for map in missing_mapped_files if map is not None - } - file_maps = {} - for map in memory_maps: - if not map["path"]: - continue - the_path = Path(map["path"]) - resolved_path = the_path.resolve() - if resolved_path in missing_map_paths: - file_maps[resolved_path] = the_path - - for data_elem in all_maps: - path = Path(data_elem["path"]) if data_elem["path"] else None - if path is not None: - path = file_maps.get(path, path) - - yield VirtualMap( - start=data_elem["start"], - end=data_elem["end"], - filesize=data_elem["filesize"], - offset=data_elem["offset"], - device=data_elem["device"], - flags=data_elem["flags"], - inode=data_elem["inode"], - path=path, - ) - - -def parse_maps_file(pid: int, all_maps: Iterable[VirtualMap]) -> MemoryMapInformation: - binary_name = Path(os.readlink(f"/proc/{pid}/exe")) - return parse_maps_file_for_binary(binary_name, all_maps) - - -def _get_base_map(binary_maps: List[VirtualMap]) -> VirtualMap: - maybe_map = next( - (map for map in binary_maps if map.path is not None), - None, - ) - if maybe_map is not None: - return maybe_map - first_map, *_ = binary_maps - return first_map - - -def _get_bss(elf_maps: List[VirtualMap], load_point: int) -> Optional[VirtualMap]: - binary_map = _get_base_map(elf_maps) - if not binary_map or not binary_map.path: - return None - try: - from ._pystack import get_bss_info - except ImportError: # pragma: no cover - return None - bss_info = get_bss_info(binary_map.path) - if not bss_info: - return None - start = load_point + bss_info["corrected_addr"] - LOGGER.info( - "Determined exact addr of .bss section: %s (%s + %s)", - hex(start), - hex(load_point), - hex(bss_info["corrected_addr"]), - ) - offset = 0 - - # Calculate the offset based on the mapped files. The offset in core files - # is only present in the core (and not in the original ELF) so this - # operation allows us to correlate the bss section with some memory location - # within the core file. - first_matching_map = next((map for map in elf_maps if map.contains(start)), None) - if first_matching_map is None: - return None - - offset = first_matching_map.offset + (start - first_matching_map.start) - - bss = VirtualMap( - start=start, - end=start + bss_info["size"], - filesize=bss_info["size"], - offset=offset, - device="", - flags="", - inode=0, - path=None, - ) - return bss - - -def parse_maps_file_for_binary( - binary_name: Path, - all_maps_iter: Iterable[VirtualMap], - load_point_by_module: Optional[Dict[str, int]] = None, -) -> MemoryMapInformation: - min_addr = float("inf") - max_addr = 0 - maps_by_library: Dict[str, List[VirtualMap]] = collections.defaultdict(list) - current_lib = "" - all_maps = tuple(all_maps_iter) - - if load_point_by_module is None: - load_point_by_module = collections.defaultdict(lambda: 2**64) - for memory_range in all_maps: - if memory_range.path is not None: - load_point_by_module[memory_range.path.name] = min( - memory_range.start, - load_point_by_module[memory_range.path.name], - ) - - for memory_range in all_maps: - current_lib = ( - memory_range.path.name if memory_range.path is not None else current_lib - ) - maps_by_library[current_lib].append(memory_range) - - if memory_range.path is None or not memory_range.path.name.startswith("[v"): - min_addr = min(min_addr, memory_range.start) - max_addr = max(max_addr, memory_range.end) - maps_by_library = dict(maps_by_library) - - python = libpython = bss = heap = None - try: - binary_maps = maps_by_library[binary_name.name] - python = _get_base_map(binary_maps) - except KeyError: - LOGGER.debug("Unable to find maps for %r in %r", binary_name, maps_by_library) - available_maps = { - str(map.path) - for map in all_maps - if map.path is not None and ".so" not in map.path.name - } - LOGGER.debug("Available executable maps: %s", ", ".join(available_maps)) - if available_maps: - maps_txt = ", ".join(available_maps) - msg = f"These are the available executable memory maps: {maps_txt}" - else: - msg = "There are no available executable maps with known paths." - raise MissingExecutableMaps( - f"Unable to find maps for the executable {binary_name}. " + msg - ) - LOGGER.info("python binary first map found: %r", python) - - libpython_binaries = [lib for lib in maps_by_library if "libpython" in lib] - if len(libpython_binaries) > 1: - raise PystackError( - f"Unexpectedly found multiple libpython in process: {libpython_binaries}" - ) - elif len(libpython_binaries) == 1: - libpython_name = libpython_binaries[0] - libpython_maps = maps_by_library[libpython_name] - load_point = load_point_by_module[libpython_name] - elf_maps = libpython_maps - libpython = _get_base_map(libpython_maps) - LOGGER.info("%r first map found: %r", libpython_name, libpython) - else: - LOGGER.info("Process does not have a libpython.so, reading from binary") - elf_maps = binary_maps - libpython = None - load_point = load_point_by_module[binary_name.name] - - heap_maps = maps_by_library.get("[heap]") - if heap_maps is not None: - *_, heap = [m for m in heap_maps if getattr(m.path, "name", None) == "[heap]"] - LOGGER.info("Heap map found: %r", heap) - - bss = _get_bss(elf_maps, load_point) - if bss is None: - bss = ( - next( - (map for map in elf_maps if map.path is None and map.is_readable()), - None, - ) - if elf_maps - else None - ) - if bss: - LOGGER.info("bss map found: %r", bss) - - memory = MemoryRange(min_addr=int(min_addr), max_addr=int(max_addr)) - return MemoryMapInformation(memory, heap, bss, python, libpython) diff --git a/src/pystack/process.py b/src/pystack/process.py index c44407f9..50c13df2 100644 --- a/src/pystack/process.py +++ b/src/pystack/process.py @@ -1,129 +1,15 @@ +"""Process utility functions. + +This module provides utility functions for checking file types +and decompressing gzip files. +""" import gzip -import logging import pathlib -import re -import subprocess import tempfile -from typing import Optional -from typing import Tuple - -from .errors import InvalidPythonProcess -from .maps import MemoryMapInformation -from .maps import VirtualMap - -VERSION_REGEXP = re.compile(r"Python (?P\d+)\.(?P\d+).*", re.IGNORECASE) - -BINARY_REGEXP = re.compile(r"python(?P\d+)\.(?P\d+).*", re.IGNORECASE) - -LIBPYTHON_REGEXP = re.compile( - r".*libpython(?P\d+)\.(?P\d+).*", re.IGNORECASE -) - -# Strings like "3.8.10 (default, May 26 2023, 14:05:08)" -# or "2.7.18rc1 (v2.7.18rc1:8d21aa21f2, Apr 20 2020, 13:19:08)" -# or "3.13.0+ experimental free-threading build (Python)" -BSS_VERSION_REGEXP = re.compile( - rb"((2|3)\.(\d+)\.(\d{1,2}))((a|b|c|rc)\d{1,2})?\+?" - rb"(?: (?:experimental )?free-threading build)? (\(.{1,64}\))" -) - -LOGGER = logging.getLogger(__file__) - - -def scan_process_bss_for_python_version( - pid: int, bss: VirtualMap -) -> Optional[Tuple[int, int]]: - # Lazy import _pystack to overcome a circular-import - # (we really don't want a new extension just for this) :( - try: - from pystack._pystack import copy_memory_from_address - except ImportError: # pragma: no cover - return None - memory = copy_memory_from_address(pid, bss.start, bss.size) - match = BSS_VERSION_REGEXP.findall(memory) - if not match: - return None - ((_, major, minor, patch, *_),) = match - return int(major), int(minor) - - -def scan_core_bss_for_python_version( - corefile: pathlib.Path, bss: VirtualMap -) -> Optional[Tuple[int, int]]: - with open(corefile, "rb") as the_corefile: - the_corefile.seek(bss.offset) - data = the_corefile.read(bss.size) - match = next(BSS_VERSION_REGEXP.finditer(data), None) - if not match: - return None - _, major, minor, patch, *_ = match.groups() - return int(major), int(minor) - - -def _get_python_version_from_map_information( - mapinfo: MemoryMapInformation, -) -> Tuple[int, int]: - match = None - assert mapinfo.python.path is not None - if mapinfo.libpython: - assert mapinfo.libpython.path is not None - LOGGER.info( - "Trying to extract version from filename: %s", mapinfo.libpython.path.name - ) - match = LIBPYTHON_REGEXP.match(mapinfo.libpython.path.name) - else: - LOGGER.info( - "Trying to extract version from filename: %s", mapinfo.python.path.name - ) - match = BINARY_REGEXP.match(mapinfo.python.path.name) - if match is None: - LOGGER.info( - "Could not find version by looking at library or binary path: " - "Trying to get it from running python --version" - ) - output = subprocess.check_output( - [mapinfo.python.path, "--version"], text=True, stderr=subprocess.STDOUT - ) - match = VERSION_REGEXP.match(output) - if not match: - raise InvalidPythonProcess( - f"Could not determine python version from {mapinfo.python.path}" - ) - major = match.group("major") - minor = match.group("minor") - LOGGER.info("Python version determined: %s.%s", major, minor) - return int(major), int(minor) - - -def get_python_version_for_process( - pid: int, mapinfo: MemoryMapInformation -) -> Tuple[int, int]: - if mapinfo.bss is not None: - version_from_bss = scan_process_bss_for_python_version(pid, mapinfo.bss) - if version_from_bss is not None: - LOGGER.info( - "Version found by scanning the bss section: %d.%d", *version_from_bss - ) - return version_from_bss - - return _get_python_version_from_map_information(mapinfo) - - -def get_python_version_for_core( - corefile: pathlib.Path, executable: pathlib.Path, mapinfo: MemoryMapInformation -) -> Tuple[int, int]: - if mapinfo.bss is not None: - version_from_bss = scan_core_bss_for_python_version(corefile, mapinfo.bss) - if version_from_bss is not None: - LOGGER.info( - "Version found by scanning the bss section: %d.%d", *version_from_bss - ) - return version_from_bss - return _get_python_version_from_map_information(mapinfo) def is_elf(filename: pathlib.Path) -> bool: - "Return True if the given file is an ELF file" + """Return True if the given file is an ELF file.""" try: elf_header = b"\x7fELF" with open(filename, "br") as thefile: @@ -132,23 +18,14 @@ def is_elf(filename: pathlib.Path) -> bool: return False -def get_thread_name(pid: int, tid: int) -> Optional[str]: - try: - with open(f"/proc/{pid}/task/{tid}/comm") as comm: - return comm.read().strip() - except OSError: - return None - - def is_gzip(filename: pathlib.Path) -> bool: - """ - Checks if the given file is a Gzip file based on the header. + """Check if the given file is a Gzip file based on the header. Args: - filename (pathlib.Path): The path to the file to be checked. + filename: The path to the file to be checked. Returns: - bool: True if the file starts with the Gzip header, False otherwise. + True if the file starts with the Gzip header, False otherwise. """ gzip_header = b"\x1f\x8b" with open(filename, "rb") as thefile: @@ -158,7 +35,7 @@ def is_gzip(filename: pathlib.Path) -> bool: def decompress_gzip( filename: pathlib.Path, chunk_size: int = 4 * 1024 * 1024 ) -> pathlib.Path: - """Decompresses a Gzip file and writes the contents to a temporary file. + """Decompress a Gzip file and write the contents to a temporary file. Args: filename: The path to the gzip file to decompress. diff --git a/tests/integration/test_gather_stacks.py b/tests/integration/test_gather_stacks.py index 693ab11b..f3a3a8a0 100644 --- a/tests/integration/test_gather_stacks.py +++ b/tests/integration/test_gather_stacks.py @@ -2,17 +2,9 @@ import subprocess import sys from pathlib import Path -from unittest.mock import mock_open -from unittest.mock import patch - -import pytest from pystack.engine import NativeReportingMode -from pystack.engine import StackMethod from pystack.engine import get_process_threads -from pystack.errors import NotEnoughInformation -from pystack.maps import MAPS_REGEXP -from pystack.process import get_thread_name from pystack.types import LocationInfo from pystack.types import NativeFrame from pystack.types import frame_type @@ -313,150 +305,6 @@ def test_multiple_thread_stack_native(python, method, blocking, tmpdir): assert any(frame.path and "?" not in frame.path for frame in eval_frames) -def test_gather_stack_with_heap_fails_if_no_heap(tmpdir): - # GIVEN / WHEN - - with spawn_child_process( - sys.executable, TEST_SINGLE_THREAD_FILE, tmpdir - ) as child_process: - the_data = [] - with open(f"/proc/{child_process.pid}/maps") as f: - for line in f.readlines(): - match = MAPS_REGEXP.match(line) - assert match is not None - if match.group("pathname") and "[heap]" in match.group("pathname"): - line = line.replace("[heap]", "[mysterious_segment]") - the_data.append(line) - data = "".join(the_data) - with patch("builtins.open", mock_open(read_data=data)): - # THEN - - with pytest.raises(NotEnoughInformation): - list( - get_process_threads( - child_process.pid, stop_process=True, method=StackMethod.HEAP - ) - ) - - -def test_gather_stack_with_bss_fails_if_no_bss(tmpdir): - # GIVEN / WHEN - - with spawn_child_process( - sys.executable, TEST_SINGLE_THREAD_FILE, tmpdir - ) as child_process: - the_data = [] - with open(f"/proc/{child_process.pid}/maps") as f: - for line in f.readlines(): - match = MAPS_REGEXP.match(line) - assert match is not None - if not match.group("pathname"): - line = line.replace("\n", "[mysterious_segment]\n") - the_data.append(line) - - data = "".join(the_data) - - with patch("builtins.open", mock_open(read_data=data)), patch( - "pystack.maps._get_bss", return_value=None - ): - # THEN - - with pytest.raises(NotEnoughInformation): - list( - get_process_threads( - child_process.pid, stop_process=True, method=StackMethod.BSS - ) - ) - - -def test_gather_stack_auto_works_if_no_bss(tmpdir): - # GIVEN / WHEN - - with spawn_child_process( - sys.executable, TEST_SINGLE_THREAD_FILE, tmpdir - ) as child_process: - the_data = [] - with open(f"/proc/{child_process.pid}/maps") as f: - for line in f.readlines(): - match = MAPS_REGEXP.match(line) - assert match is not None - if not match.group("pathname"): - line = line.replace("\n", "[mysterious_segment]\n") - the_data.append(line) - data = "".join(the_data) - with patch("builtins.open", mock_open(read_data=data)), patch( - "pystack.maps._get_bss", return_value=None - ): - threads = list( - get_process_threads( - child_process.pid, stop_process=True, method=StackMethod.AUTO - ) - ) - - # THEN - - assert len(threads) == 1 - (thread,) = threads - - frames = list(thread.frames) - assert (len(frames)) == 4 - - filenames = {frame.code.filename for frame in frames} - assert filenames == {str(TEST_SINGLE_THREAD_FILE)} - - functions = [frame.code.scope for frame in frames] - assert functions == ["", "first_func", "second_func", "third_func"] - - *line_numbers, last_line = [frame.code.location.lineno for frame in frames] - assert line_numbers == [20, 6, 10] - assert last_line in {16, 17} - - assert not thread.native_frames - - -def test_gather_stack_auto_works_if_no_heap(tmpdir): - # GIVEN / WHEN - - with spawn_child_process( - sys.executable, TEST_SINGLE_THREAD_FILE, tmpdir - ) as child_process: - the_data = [] - with open(f"/proc/{child_process.pid}/maps") as f: - for line in f.readlines(): - match = MAPS_REGEXP.match(line) - assert match is not None - if match.group("pathname") and "[heap]" in match.group("pathname"): - line = line.replace("[heap]", "[mysterious_segment]") - the_data.append(line) - data = "".join(the_data) - with patch("builtins.open", mock_open(read_data=data)): - threads = list( - get_process_threads( - child_process.pid, stop_process=True, method=StackMethod.AUTO - ) - ) - - # THEN - - assert len(threads) == 1 - (thread,) = threads - - frames = list(thread.frames) - assert (len(frames)) == 4 - - filenames = {frame.code.filename for frame in frames} - assert filenames == {str(TEST_SINGLE_THREAD_FILE)} - - functions = [frame.code.scope for frame in frames] - assert functions == ["", "first_func", "second_func", "third_func"] - - *line_numbers, last_line = [frame.code.location.lineno for frame in frames] - assert line_numbers == [20, 6, 10] - assert last_line in {16, 17} - - assert not thread.native_frames - - @ALL_PYTHONS def test_thread_registered_with_python_but_with_no_python_calls(python, tmpdir): # GIVEN @@ -584,14 +432,6 @@ def test_get_thread_name(tmpdir): assert "thread_foo" in {thread.name for thread in threads} -def test_get_thread_name_oserror(): - # WHEN - thread_name = get_thread_name(pid=0, tid=0) - - # THEN - assert thread_name is None - - @ALL_PYTHONS def test_inlined_python_calls(python, tmpdir): # GIVEN diff --git a/tests/integration/test_process.py b/tests/integration/test_process.py index a6518615..190c4443 100644 --- a/tests/integration/test_process.py +++ b/tests/integration/test_process.py @@ -1,72 +1,20 @@ import sys +import threading +from concurrent.futures import ThreadPoolExecutor from pathlib import Path import pytest from pystack._pystack import ProcessManager -from pystack.engine import CoreFileAnalyzer from pystack.engine import get_process_threads -from pystack.errors import EngineError -from pystack.maps import generate_maps_for_process -from pystack.maps import parse_maps_file -from pystack.maps import parse_maps_file_for_binary from pystack.process import is_elf -from pystack.process import scan_core_bss_for_python_version -from pystack.process import scan_process_bss_for_python_version from tests.utils import ALL_PYTHONS -from tests.utils import generate_core_file from tests.utils import spawn_child_process TEST_SINGLE_THREAD_FILE = Path(__file__).parent / "single_thread_program.py" TEST_SHUTDOWN_FILE = Path(__file__).parent / "shutdown_program.py" -@ALL_PYTHONS -def test_remote_version_detection_using_bss_section(python, tmpdir): - # GIVEN - - (expected_major, expected_minor), python_executable = python - - # WHEN - - with spawn_child_process( - python_executable, TEST_SINGLE_THREAD_FILE, tmpdir - ) as child_process: - all_maps = generate_maps_for_process(child_process.pid) - maps = parse_maps_file(child_process.pid, all_maps) - major, minor = scan_process_bss_for_python_version(child_process.pid, maps.bss) - - # THEN - - assert major == expected_major - assert minor == expected_minor - - -@ALL_PYTHONS -def test_core_version_detection_using_bss_section(python, tmpdir): - # GIVEN - - (expected_major, expected_minor), python_executable = python - - # WHEN - - with generate_core_file( - python_executable, TEST_SINGLE_THREAD_FILE, tmpdir - ) as corefile: - core_map_analyzer = CoreFileAnalyzer(str(corefile), str(python_executable)) - virtual_maps = tuple(core_map_analyzer.extract_maps()) - load_point_by_module = core_map_analyzer.extract_module_load_points() - maps = parse_maps_file_for_binary( - python_executable, virtual_maps, load_point_by_module - ) - major, minor = scan_core_bss_for_python_version(corefile, maps.bss) - - # THEN - - assert major == expected_major - assert minor == expected_minor - - @ALL_PYTHONS def test_detection_of_interpreter_shutdown(python, tmpdir): # GIVEN @@ -120,11 +68,29 @@ def test_reattaching_to_already_traced_process(python, tmpdir): pid = child_process.pid # WHEN / THEN - with pytest.raises(EngineError, match="Operation not permitted"): - it1 = iter(get_process_threads(pid, stop_process=True)) - it2 = iter(get_process_threads(pid, stop_process=True)) - next(it1) - next(it2) + # Use threading to create overlapping attachment attempts. + # The first thread holds the ptrace attachment while the second tries to attach. + barrier = threading.Barrier(2) + results = [] + errors = [] + + def attach_thread(): + try: + barrier.wait(timeout=5) # Synchronize start + threads = list(get_process_threads(pid, stop_process=True)) + results.append(len(threads)) + except Exception as e: + errors.append(str(e)) + + with ThreadPoolExecutor(max_workers=2) as executor: + f1 = executor.submit(attach_thread) + f2 = executor.submit(attach_thread) + f1.result(timeout=10) + f2.result(timeout=10) + + # One should succeed, one should fail with "Operation not permitted" + assert len(results) + len(errors) == 2 + assert any("Operation not permitted" in err for err in errors) @pytest.mark.parametrize( diff --git a/tests/unit/test_maps.py b/tests/unit/test_maps.py deleted file mode 100644 index 4f829749..00000000 --- a/tests/unit/test_maps.py +++ /dev/null @@ -1,1267 +0,0 @@ -from pathlib import Path -from unittest.mock import mock_open -from unittest.mock import patch - -import pytest - -from pystack.errors import MissingExecutableMaps -from pystack.errors import ProcessNotFound -from pystack.errors import PystackError -from pystack.maps import VirtualMap -from pystack.maps import _get_base_map -from pystack.maps import _get_bss -from pystack.maps import generate_maps_for_process -from pystack.maps import parse_maps_file_for_binary - - -def test_virtual_map(): - # GIVEN - - map = VirtualMap( - start=0, - end=10, - offset=1234, - device="device", - flags="xrwp", - inode=42, - path=None, - filesize=10, - ) - - # WHEN / THEN - - assert map.contains(5) - assert not map.contains(15) - assert map.is_private() - assert map.is_executable() - assert map.is_readable() - assert map.is_writable() - - -def test_simple_maps_no_such_pid(): - # GIVEN - - with patch("builtins.open", side_effect=FileNotFoundError()): - # WHEN / THEN - with pytest.raises(ProcessNotFound): - list(generate_maps_for_process(1)) - - -def test_simple_maps(): - # GIVEN - - map_text = """ -7f1ac1e2b000-7f1ac1e50000 r--p 00000000 08:12 8398159 /usr/lib/libc-2.31.so - """ - - # WHEN - - with patch("builtins.open", mock_open(read_data=map_text)): - maps = list(generate_maps_for_process(1)) - - # THEN - - assert maps == [ - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - -def test_maps_with_long_device_numbers(): - # GIVEN - - map_text = """ -7f1ac1e2b000-7f1ac1e50000 r--p 00000000 0123:4567 8398159 /usr/lib/libc-2.31.so - """ - - # WHEN - - with patch("builtins.open", mock_open(read_data=map_text)): - maps = list(generate_maps_for_process(1)) - - # THEN - - assert maps == [ - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="0123:4567", - flags="r--p", - inode=8398159, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - -def test_anonymous_maps(): - # GIVEN - - map_text = """ -7f1ac1e2b000-7f1ac1e50000 r--p 00000000 08:12 8398159 - """ - - # WHEN - - with patch("builtins.open", mock_open(read_data=map_text)): - maps = list(generate_maps_for_process(1)) - - # THEN - - assert maps == [ - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=None, - ), - ] - - -def test_map_permissions(): - # GIVEN - - map_text = """ -7f1ac1e2b000-7f1ac1e50000 r--- 00000000 08:12 8398159 /usr/lib/libc-2.31.so -7f1ac1e2b000-7f1ac1e50000 rw-- 00000000 08:12 8398159 /usr/lib/libc-2.31.so -7f1ac1e2b000-7f1ac1e50000 rwx- 00000000 08:12 8398159 /usr/lib/libc-2.31.so -7f1ac1e2b000-7f1ac1e50000 rwxp 00000000 08:12 8398159 /usr/lib/libc-2.31.so - """ - - # WHEN - - with patch("builtins.open", mock_open(read_data=map_text)): - maps = list(generate_maps_for_process(1)) - - # THEN - - assert maps == [ - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="08:12", - flags="r---", - inode=8398159, - path=Path("/usr/lib/libc-2.31.so"), - ), - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="08:12", - flags="rw--", - inode=8398159, - path=Path("/usr/lib/libc-2.31.so"), - ), - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="08:12", - flags="rwx-", - inode=8398159, - path=Path("/usr/lib/libc-2.31.so"), - ), - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="08:12", - flags="rwxp", - inode=8398159, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - -def test_unexpected_line_is_ignored(): - # GIVEN - - map_text = """ -I am an unexpected line -7f1ac1e2b000-7f1ac1e50000 r--p 00000000 08:12 8398159 /usr/lib/libc-2.31.so - """ - - # WHEN - - with patch("builtins.open", mock_open(read_data=map_text)): - maps = list(generate_maps_for_process(1)) - - # THEN - - assert maps == [ - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - -def test_special_maps(): - # GIVEN - - map_text = """ -555f1ab1c000-555f1ab3d000 rw-p 00000000 00:00 0 [heap] -7ffdf8102000-7ffdf8124000 rw-p 00000000 00:00 0 [stack] -7ffdf8152000-7ffdf8155000 r--p 00000000 00:00 0 [vvar] -7ffdf8155000-7ffdf8156000 r-xp 00000000 00:00 0 [vdso] -ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall] - """ - - # WHEN - - with patch("builtins.open", mock_open(read_data=map_text)): - maps = list(generate_maps_for_process(1)) - - # THEN - - assert maps == [ - VirtualMap( - start=93866958110720, - end=93866958245888, - filesize=135168, - offset=0, - device="00:00", - flags="rw-p", - inode=0, - path=Path("[heap]"), - ), - VirtualMap( - start=140728765259776, - end=140728765399040, - filesize=139264, - offset=0, - device="00:00", - flags="rw-p", - inode=0, - path=Path("[stack]"), - ), - VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=12288, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("[vvar]"), - ), - VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("[vdso]"), - ), - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("[vsyscall]"), - ), - ] - - -def test_maps_for_binary_only_python_exec(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - maps = [ - python, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython is None - assert mapinfo.bss is None - assert mapinfo.heap is None - - -def test_maps_for_binary_with_heap(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - heap = VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=12288, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("[heap]"), - ) - - maps = [ - python, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - heap, - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython is None - assert mapinfo.bss is None - assert mapinfo.heap == heap - - -def test_maps_for_binary_with_libpython(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - libpython = VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("/some/path/to/libpython.so"), - ) - - maps = [ - python, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - libpython, - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython == libpython - assert mapinfo.bss is None - assert mapinfo.heap is None - - -def test_maps_for_binary_executable_with_bss(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - bss = VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=4096, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=None, - ) - - maps = [ - python, - bss, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython is None - assert mapinfo.bss == bss - assert mapinfo.heap is None - - -def test_maps_for_binary_libpython_with_bss(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - bss = VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=4096, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=None, - ) - - libpython = VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("/some/path/to/libpython.so"), - ) - - libpython_bss = VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=None, - ) - - maps = [ - python, - bss, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - libpython, - libpython_bss, - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython == libpython - assert mapinfo.bss == libpython_bss - assert mapinfo.heap is None - - -def test_maps_for_binary_libpython_without_bss(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - bss = VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=4096, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=None, - ) - - libpython = VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("/some/path/to/libpython.so"), - ) - - maps = [ - python, - bss, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - libpython, - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython == libpython - assert mapinfo.bss is None - assert mapinfo.heap is None - - -def test_maps_for_binary_libpython_with_bss_with_non_readable_segment(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - bss = VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=4096, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=None, - ) - - libpython = VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("/some/path/to/libpython.so"), - ) - - libpython_bss = VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=None, - ) - - maps = [ - python, - bss, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - libpython, - VirtualMap( - start=1844674407369906, - end=18446744073699069, - filesize=4096, - offset=0, - device="00:00", - flags="---p", - inode=0, - path=None, - ), - libpython_bss, - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython == libpython - assert mapinfo.bss == libpython_bss - assert mapinfo.heap is None - - -def test_maps_for_binary_range(): - # GIVEN - - maps = [ - VirtualMap( - start=1, - end=2, - filesize=1, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ), - VirtualMap( - start=2, - end=3, - filesize=1, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=None, - ), - VirtualMap( - start=5, - end=6, - filesize=1, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("/some/path/to/libpython.so"), - ), - VirtualMap( - start=8, - end=9, - filesize=1, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=None, - ), - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.memory.min_addr == 1 - assert mapinfo.memory.max_addr == 9 - - -def test_maps_for_binary_range_vmaps_are_ignored(): - # GIVEN - - maps = [ - VirtualMap( - start=1, - end=2, - filesize=1, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ), - VirtualMap( - start=2000, - end=3000, - filesize=1000, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=Path("[vsso]"), - ), - VirtualMap( - start=5, - end=6, - filesize=1, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("[vsyscall]"), - ), - VirtualMap( - start=8, - end=9, - filesize=1, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("[vvar]"), - ), - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.memory.min_addr == 1 - assert mapinfo.memory.max_addr == 2 - - -def test_maps_for_binary_no_binary_map(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - maps = [ - python, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - # WHEN / THEN - - with pytest.raises(MissingExecutableMaps): - parse_maps_file_for_binary(Path("another_executable"), maps) - - -def test_maps_for_binary_no_executable_segment(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("the_executable"), - ) - - maps = [ - python, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython is None - assert mapinfo.bss is None - assert mapinfo.heap is None - - -def test_maps_for_binary_multiple_libpythons(): - # GIVEN - - maps = [ - VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("the_executable"), - ), - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libpython3.8.so"), - ), - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libpython2.7.so"), - ), - ] - - # WHEN / THEN - - with pytest.raises(PystackError): - parse_maps_file_for_binary(Path("the_executable"), maps) - - -def test_maps_for_binary_invalid_executable(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - maps = [ - python, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - # WHEN - - with pytest.raises(MissingExecutableMaps, match="the_executable"): - parse_maps_file_for_binary(Path("other_executable"), maps) - - -def test_maps_for_binary_invalid_executable_and_no_available_maps(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=None, - ) - - maps = [ - python, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - # WHEN - - with pytest.raises( - MissingExecutableMaps, match="There are no available executable maps" - ): - parse_maps_file_for_binary(Path("other_executable"), maps) - - -def test_maps_with_scattered_segments(): - map_text = """ -00400000-00401000 r-xp 00000000 fd:00 67488961 /bin/python3.9-dbg -00600000-00601000 r--p 00000000 fd:00 67488961 /bin/python3.9-dbg -00601000-00602000 rw-p 00001000 fd:00 67488961 /bin/python3.9-dbg -0067b000-00a58000 rw-p 00000000 00:00 0 [heap] -7f7b38000000-7f7b38028000 rw-p 00000000 00:00 0 -7f7b38028000-7f7b3c000000 ---p 00000000 00:00 0 -7f7b40000000-7f7b40021000 rw-p 00000000 00:00 0 -7f7b40021000-7f7b44000000 ---p 00000000 00:00 0 -7f7b44ec0000-7f7b44f40000 rw-p 00000000 00:00 0 -f7b45a61000-7f7b45d93000 rw-p 00000000 00:00 0 -7f7b46014000-7f7b46484000 r--p 0050b000 fd:00 1059871 /lib64/libpython3.9d.so.1.0 -7f7b46484000-7f7b46485000 ---p 00000000 00:00 0 -7f7b46485000-7f7b46cda000 rw-p 00000000 00:00 0 -7f7b46cda000-7f7b46d16000 r--p 00a3d000 fd:00 1059871 /lib64/libpython3.9d.so.1.0 -7f7b46d16000-7f7b46d6f000 rw-p 00000000 00:00 0 -7f7b46d6f000-7f7b46d92000 r--p 00001000 fd:00 67488961 /bin/python3.9-dbg -7f7b46d92000-7f7b46d93000 ---p 00000000 00:00 0 -7f7b46d93000-7f7b475d3000 rw-p 00000000 00:00 0 -7f7b498c1000-7f7b49928000 r-xp 00000000 fd:00 7023 /lib64/libssl.so.1.0.0 -7f7b49928000-7f7b49b28000 ---p 00067000 fd:00 7023 /lib64/libssl.so.1.0.0 -f7b4c632000-7f7b4c6f3000 rw-p 00000000 00:00 0 -7f7b4c6f3000-7f7b4c711000 rw-p 00000000 00:00 0 -7f7b4c711000-7f7b4c712000 r--p 0002a000 fd:00 67488961 /bin/python3.9-dbg -7f7b4c712000-7f7b4c897000 rw-p 00000000 00:00 0 -7f7b5a356000-7f7b5a35d000 r--s 00000000 fd:00 201509519 /usr/lib64/gconv/gconv-modules.cache -7f7b5a35d000-7f7b5a827000 r-xp 00000000 fd:00 1059871 /lib64/libpython3.9d.so.1.0 -7f7b5a827000-7f7b5aa27000 ---p 004ca000 fd:00 1059871 /lib64/libpython3.9d.so.1.0 -7f7b5aa27000-7f7b5aa2c000 r--p 004ca000 fd:00 1059871 /lib64/libpython3.9d.so.1.0 -7f7b5aa2c000-7f7b5aa67000 rw-p 004cf000 fd:00 1059871 /lib64/libpython3.9d.so.1.0 -7f7b5aa67000-7f7b5aa8b000 rw-p 00000000 00:00 0 -7fff26f8e000-7fff27020000 rw-p 00000000 00:00 0 [stack] -7fff27102000-7fff27106000 r--p 00000000 00:00 0 [vvar] -7fff27106000-7fff27108000 r-xp 00000000 00:00 0 [vdso] -ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall] - """ - - # WHEN - - with patch("builtins.open", mock_open(read_data=map_text)): - maps = list(generate_maps_for_process(1)) - - mapinfo = parse_maps_file_for_binary(Path("/bin/python3.9-dbg"), maps) - - # THEN - - assert mapinfo.python == VirtualMap( - start=0x400000, - end=0x401000, - filesize=4096, - offset=0, - device="fd:00", - flags="r-xp", - inode=67488961, - path=Path("/bin/python3.9-dbg"), - ) - assert mapinfo.libpython == VirtualMap( - start=0x7F7B46014000, - end=0x7F7B46484000, - filesize=4653056, - offset=5287936, - device="fd:00", - flags="r--p", - inode=1059871, - path=Path("/lib64/libpython3.9d.so.1.0"), - ) - assert mapinfo.bss == VirtualMap( - start=140167436849152, - end=140167445585920, - filesize=8736768, - offset=0, - device="00:00", - flags="rw-p", - inode=0, - path=None, - ) - assert mapinfo.heap == VirtualMap( - start=0x0067B000, - end=0x00A58000, - filesize=4050944, - offset=0, - device="00:00", - flags="rw-p", - inode=0, - path=Path("[heap]"), - ) - - -def test_get_base_map_path_existing(): - # GIVEN - maps = [ - VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=None, - ), - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - # WHEN - base_map = _get_base_map(maps) - - # THEN - assert base_map == maps[1] - - -def test_get_base_map_path_not_existing(): - # GIVEN - maps = [ - VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=None, - ), - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=None, - ), - ] - - # WHEN - base_map = _get_base_map(maps) - - # THEN - assert base_map == maps[0] - - -def test_get_bss_base_map_no_path(): - # GIVEN - map_no_path = VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=None, - ) - - # WHEN - with patch("pystack.maps._get_base_map", return_value=map_no_path): - bss = _get_bss("elf_maps", "load_point") - - # THEN - assert bss is None - - -def test_get_bss_no_matching_map(): - # GIVEN - libpython = VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("/some/path/to/libpython.so"), - ) - - libpython_bss = VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=None, - ) - maps = [libpython, libpython_bss] - - # WHEN - with patch("pystack._pystack.get_bss_info") as mock_get_bss_info: - mock_get_bss_info.return_value = {"corrected_addr": 100000000} - bss = _get_bss(maps, libpython.start) - - # THEN - assert bss is None - - -def test_get_bss_found_matching_map(): - # GIVEN - libpython = VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("/some/path/to/libpython.so"), - ) - - libpython_bss = VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=None, - ) - maps = [libpython, libpython_bss] - - # WHEN - with patch("pystack._pystack.get_bss_info") as mock_get_bss_info: - mock_get_bss_info.return_value = { - "corrected_addr": libpython_bss.start - libpython.start, - "size": libpython_bss.filesize, - } - bss = _get_bss(maps, libpython.start) - - # THEN - assert bss == VirtualMap( - start=libpython_bss.start, - end=libpython_bss.end, - filesize=libpython_bss.filesize, - offset=libpython_bss.offset, - device="", - flags="", - inode=0, - path=None, - ) diff --git a/tests/unit/test_process.py b/tests/unit/test_process.py deleted file mode 100644 index f33ae70e..00000000 --- a/tests/unit/test_process.py +++ /dev/null @@ -1,480 +0,0 @@ -from unittest.mock import Mock -from unittest.mock import mock_open -from unittest.mock import patch - -import pytest - -from pystack.errors import InvalidPythonProcess -from pystack.maps import VirtualMap -from pystack.process import BINARY_REGEXP -from pystack.process import LIBPYTHON_REGEXP -from pystack.process import VERSION_REGEXP -from pystack.process import get_python_version_for_core -from pystack.process import get_python_version_for_process -from pystack.process import scan_core_bss_for_python_version -from pystack.process import scan_process_bss_for_python_version - - -@pytest.mark.parametrize( - "text, version", - [ - ("libpython3.8.so", (3, 8)), - ("libpython3.5.12.so", (3, 5)), - ("libpython3.8m.so", (3, 8)), - ("libpython3.8d.so", (3, 8)), - ("libpython3.8dm.so", (3, 8)), - ("libpython2.7.so.1", (2, 7)), - ("libpython2.7.so.1.0", (2, 7)), - ("LIBPYTHON3.8.so", (3, 8)), - ("LiBpYtHoN3.6.so", (3, 6)), - ], -) -def test_libpython_detection(text, version): - # GIVEN / WHEN - result = LIBPYTHON_REGEXP.match(text) - - # THEN - assert result - - major, minor = version - assert int(result.group("major")) == major - assert int(result.group("minor")) == minor - - -@pytest.mark.parametrize( - "text", ["libpython.so", "libpython.so.1.0", "libpythondm.so.1.0"] -) -def test_libpython_false_cases(text): - # GIVEN / WHEN - result = LIBPYTHON_REGEXP.match(text) - - # THEN - assert result is None - - -@pytest.mark.parametrize( - "text, version", - [ - ("python3.8", (3, 8)), - ("python3.5.1.2", (3, 5)), - ("python2.7.exe", (2, 7)), - ("Python3.6", (3, 6)), - ("PyThOn3.5", (3, 5)), - ], -) -def test_executable_detection(text, version): - # GIVEN / WHEN - result = BINARY_REGEXP.match(text) - - # THEN - assert result - - major, minor = version - assert int(result.group("major")) == major - assert int(result.group("minor")) == minor - - -@pytest.mark.parametrize("text", ["cat3.8", "python3", "python2"]) -def test_executable_false_cases(text): - # GIVEN / WHEN - result = BINARY_REGEXP.match(text) - - # THEN - assert result is None - - -@pytest.mark.parametrize( - "text, version", - [ - ("Python 3.8.2", (3, 8)), - ("Python 3.8.1rc2", (3, 8)), - ("Python 3.9.0b4", (3, 9)), - ("Python 2.7.16", (2, 7)), - ], -) -def test_version_detection(text, version): - # GIVEN / WHEN - result = VERSION_REGEXP.match(text) - - # THEN - assert result - - major, minor = version - assert int(result.group("major")) == major - assert int(result.group("minor")) == minor - - -def test_get_python_version_for_process_fallback_bss(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_process_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = (3, 8) - major, minor = get_python_version_for_process(0, mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_not_called() - binary_regexp_mock.assert_not_called() - subprocess_mock.assert_not_called() - - -def test_get_python_version_for_process_fallback_libpython_regexp(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_process_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - match = Mock() - match.group.side_effect = [3, 8] - libpython_regexp_mock.match.return_value = match - major, minor = get_python_version_for_process(0, mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_called_once() - binary_regexp_mock.assert_not_called() - subprocess_mock.assert_not_called() - - -def test_get_python_version_for_process_fallback_binary_regexp(): - # GIVEN - mapinfo = Mock() - mapinfo.libpython = None - - # WHEN - with patch( - "pystack.process.scan_process_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - libpython_regexp_mock.match.return_value = None - match = Mock() - match.group.side_effect = [3, 8] - binary_regexp_mock.match.return_value = match - major, minor = get_python_version_for_process(0, mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_not_called() - binary_regexp_mock.match.assert_called_once() - subprocess_mock.assert_not_called() - - -def test_get_python_version_for_process_fallback_version_regexp(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_process_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - libpython_regexp_mock.match.return_value = None - subprocess_mock.return_value = "Python 3.8.3" - major, minor = get_python_version_for_process(0, mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_called_once() - binary_regexp_mock.match.asser_not_called() - subprocess_mock.assert_called_once() - - -def test_get_python_version_for_process_fallback_failure(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_process_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ), patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - libpython_regexp_mock.match.return_value = None - subprocess_mock.return_value = "" - # THEN - with pytest.raises(InvalidPythonProcess): - get_python_version_for_process(0, mapinfo) - - -def test_get_python_version_for_core_fallback_bss(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_core_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = (3, 8) - major, minor = get_python_version_for_core("corefile", "executable", mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_not_called() - binary_regexp_mock.assert_not_called() - subprocess_mock.assert_not_called() - - -def test_get_python_version_for_core_fallback_no_bss(): - # GIVEN - mapinfo = Mock() - mapinfo.bss = None - - # WHEN - with patch( - "pystack.process.scan_core_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - match = Mock() - match.group.side_effect = [3, 8] - libpython_regexp_mock.match.return_value = match - major, minor = get_python_version_for_core("corefile", "executable", mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_not_called() - libpython_regexp_mock.match.assert_called_once() - binary_regexp_mock.assert_not_called() - subprocess_mock.assert_not_called() - - -def test_get_python_version_for_core_fallback_libpython_regexp(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_core_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - match = Mock() - match.group.side_effect = [3, 8] - libpython_regexp_mock.match.return_value = match - major, minor = get_python_version_for_core("corefile", "executable", mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_called_once() - binary_regexp_mock.assert_not_called() - subprocess_mock.assert_not_called() - - -def test_get_python_version_for_core_fallback_binary_regexp(): - # GIVEN - mapinfo = Mock() - mapinfo.libpython = None - - # WHEN - with patch( - "pystack.process.scan_core_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - libpython_regexp_mock.match.return_value = None - match = Mock() - match.group.side_effect = [3, 8] - binary_regexp_mock.match.return_value = match - major, minor = get_python_version_for_core("corefile", "executable", mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_not_called() - binary_regexp_mock.match.assert_called_once() - subprocess_mock.assert_not_called() - - -def test_get_python_version_for_core_fallback_version_regexp(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_core_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - libpython_regexp_mock.match.return_value = None - subprocess_mock.return_value = "Python 3.8.3" - major, minor = get_python_version_for_core("corefile", "executable", mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_called_once() - binary_regexp_mock.match.asser_not_called() - subprocess_mock.assert_called_once() - - -def test_get_python_version_for_core_fallback_falure(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_core_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ), patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - libpython_regexp_mock.match.return_value = None - subprocess_mock.return_value = "" - # THEN - with pytest.raises(InvalidPythonProcess): - get_python_version_for_core("corefile", "executable", mapinfo) - - -def test_scan_process_bss_for_python_version(): - # GIVEM - - memory = ( - b"garbagegarbagePython 3.8.3 (default, May 22 2020, 23:30:25)garbagegarbage" - ) - bss = Mock() - # WHEN - - with patch("pystack._pystack.copy_memory_from_address", return_value=memory): - major, minor = scan_process_bss_for_python_version(0, bss) - - # THEN - - assert major == 3 - assert minor == 8 - - -def test_scan_process_bss_for_python_version_failure(): - # GIVEM - - memory = b"garbagegarbagegarbagegarbage" - bss = Mock() - # WHEN - - with patch("pystack._pystack.copy_memory_from_address", return_value=memory): - result = scan_process_bss_for_python_version(0, bss) - - # THEN - - assert result is None - - -def test_scan_core_bss_for_python_version(): - # GIVEM - - memory = ( - b"garbagegarbagePython 3.8.3 (default, May 22 2020, 23:30:25)garbagegarbage" - ) - bss = VirtualMap( - start=0, - end=len(memory), - filesize=len(memory), - offset=0, - flags="", - inode=0, - device="", - path=None, - ) - # WHEN - - with patch("builtins.open", mock_open(read_data=memory)): - major, minor = scan_core_bss_for_python_version("corefile", bss) - - # THEN - - assert major == 3 - assert minor == 8 - - -def test_scan_core_bss_for_python_version_failure(): - # GIVEM - - memory = b"garbagegarbagegarbagegarbage" - bss = VirtualMap( - start=0, - end=len(memory), - filesize=len(memory), - offset=0, - flags="", - inode=0, - device="", - path=None, - ) - # WHEN - - with patch("builtins.open", mock_open(read_data=memory)): - result = scan_core_bss_for_python_version("corefile", bss) - - # THEN - - assert result is None