From 002337ba12d2fb67f8deb30b73a2f2244139bb86 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sun, 25 Jan 2026 19:08:05 +0000 Subject: [PATCH] Replace Cython with nanobind for Python bindings The Cython-based bindings required extensive marshaling between Python and C++ data structures, causing performance overhead and maintenance complexity. Each call crossed the language boundary multiple times, converting maps, thread lists, and frame data back and forth. This migration moves to nanobind with scikit-build-core for the build system. The key architectural change is moving logic that previously lived in Cython or Python into C++: maps parsing, version detection, and thread construction now happen entirely in C++ before returning results to Python. This eliminates round-trips and simplifies the codebase by removing the Cython layer entirely. The Python API remains unchanged. --- .github/workflows/coverage.yml | 25 +- CMakeLists.txt | 24 + Makefile | 16 +- pyproject.toml | 56 +- setup.py | 147 --- src/pystack/_pystack.pyi | 76 +- src/pystack/_pystack.pyx | 782 ------------- src/pystack/_pystack/CMakeLists.txt | 95 +- src/pystack/_pystack/__init__.pxd | 0 src/pystack/_pystack/bindings.cpp | 791 +++++++++++++ src/pystack/_pystack/corefile.pxd | 55 - src/pystack/_pystack/elf_common.pxd | 22 - src/pystack/_pystack/logging.cpp | 84 +- src/pystack/_pystack/logging.pxd | 2 - src/pystack/_pystack/maps_parser.cpp | 357 ++++++ src/pystack/_pystack/maps_parser.h | 44 + src/pystack/_pystack/mem.cpp | 43 - src/pystack/_pystack/mem.h | 44 +- src/pystack/_pystack/mem.pxd | 33 - src/pystack/_pystack/native_frame.pxd | 11 - src/pystack/_pystack/process.cpp | 172 ++- src/pystack/_pystack/process.h | 35 +- src/pystack/_pystack/process.pxd | 41 - src/pystack/_pystack/pycode.pxd | 17 - src/pystack/_pystack/pyframe.cpp | 11 +- src/pystack/_pystack/pyframe.pxd | 19 - src/pystack/_pystack/pythread.cpp | 6 +- src/pystack/_pystack/pythread.pxd | 37 - src/pystack/_pystack/thread_builder.cpp | 193 ++++ src/pystack/_pystack/thread_builder.h | 72 ++ src/pystack/_pystack/unwinder.cpp | 35 +- src/pystack/_pystack/version_detector.cpp | 165 +++ src/pystack/_pystack/version_detector.h | 22 + src/pystack/errors.py | 21 + src/pystack/maps.py | 283 +---- src/pystack/process.py | 143 +-- tests/integration/test_gather_stacks.py | 160 --- tests/integration/test_process.py | 84 +- tests/unit/test_maps.py | 1267 --------------------- tests/unit/test_process.py | 480 -------- 40 files changed, 2259 insertions(+), 3711 deletions(-) create mode 100644 CMakeLists.txt delete mode 100644 setup.py delete mode 100644 src/pystack/_pystack.pyx delete mode 100644 src/pystack/_pystack/__init__.pxd create mode 100644 src/pystack/_pystack/bindings.cpp delete mode 100644 src/pystack/_pystack/corefile.pxd delete mode 100644 src/pystack/_pystack/elf_common.pxd delete mode 100644 src/pystack/_pystack/logging.pxd create mode 100644 src/pystack/_pystack/maps_parser.cpp create mode 100644 src/pystack/_pystack/maps_parser.h delete mode 100644 src/pystack/_pystack/mem.pxd delete mode 100644 src/pystack/_pystack/native_frame.pxd delete mode 100644 src/pystack/_pystack/process.pxd delete mode 100644 src/pystack/_pystack/pycode.pxd delete mode 100644 src/pystack/_pystack/pyframe.pxd delete mode 100644 src/pystack/_pystack/pythread.pxd create mode 100644 src/pystack/_pystack/thread_builder.cpp create mode 100644 src/pystack/_pystack/thread_builder.h create mode 100644 src/pystack/_pystack/version_detector.cpp create mode 100644 src/pystack/_pystack/version_detector.h delete mode 100644 tests/unit/test_maps.py delete mode 100644 tests/unit/test_process.py diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index d5fc474e..20ca87bc 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -38,29 +38,40 @@ jobs: sudo apt-get install -qy \ gdb \ lcov \ + cmake \ + ninja-build \ libdw-dev \ libelf-dev \ python3.10-dev \ python3.10-dbg - name: Install Python dependencies run: | - python3 -m pip install --upgrade pip cython pkgconfig - make test-install + python3 -m pip install --upgrade pip scikit-build-core nanobind + python3 -m pip install -e . -r requirements-test.txt - name: Disable ptrace security restrictions run: | echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope - - name: Compute Python + Cython coverage + - name: Compute Python coverage run: | - make pycoverage + python3 -m pytest -vvv --log-cli-level=info -s --color=yes \ + --cov=pystack --cov=tests --cov-config=pyproject.toml --cov-report=term \ + --cov-append tests --cov-fail-under=85 + python3 -m coverage lcov -i -o pycoverage.lcov + genhtml *coverage.lcov --branch-coverage --output-directory pystack-coverage - name: Compute C++ coverage run: | - make ccoverage - - name: Upload {P,C}ython report to Codecov + rm -rf build + CFLAGS="-O0 -pg --coverage" CXXFLAGS="-O0 -pg --coverage" pip install -e . --no-build-isolation + python3 -m pytest tests -v + find build -name "*.gcda" -o -name "*.gcno" | head -5 + lcov --capture --directory . --output-file cppcoverage.lcov || true + lcov --extract cppcoverage.lcov '*/src/pystack/_pystack/*' --output-file cppcoverage.lcov || true + - name: Upload Python report to Codecov uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} files: pycoverage.lcov - flags: python_and_cython + flags: python - name: Upload C++ report to Codecov uses: codecov/codecov-action@v5 with: diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..eb4eae34 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required(VERSION 3.17...3.27) + +project(pystack LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +# Find Python +find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED) + +# Find nanobind +execute_process( + COMMAND "${Python_EXECUTABLE}" -m nanobind --cmake_dir + OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE nanobind_ROOT) +find_package(nanobind CONFIG REQUIRED) + +# Find libelf and libdw via pkg-config +find_package(PkgConfig REQUIRED) +pkg_check_modules(LIBELF REQUIRED libelf) +pkg_check_modules(LIBDW REQUIRED libdw) + +# Add the extension module subdirectory +add_subdirectory(src/pystack/_pystack) diff --git a/Makefile b/Makefile index a49b6567..2e3f7b78 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -PYTHON ?= python +PYTHON ?= .venv/bin/python DOCKER_IMAGE ?= pystack DOCKER_SRC_DIR ?= /src @@ -13,11 +13,11 @@ ENV := .PHONY: build build: ## (default) Build package extensions in-place - $(PYTHON) setup.py build_ext --inplace + $(PYTHON) -m pip install -e . --no-build-isolation .PHONY: dist dist: ## Generate Python distribution files - $(PYTHON) -m pep517.build . + $(PYTHON) -m build .PHONY: install-sdist install-sdist: dist ## Install from source distribution @@ -25,7 +25,7 @@ install-sdist: dist ## Install from source distribution .PHONY: test-install test-install: ## Install with test dependencies - $(ENV) CYTHON_TEST_MACROS=1 $(PIP_INSTALL) -e . -r requirements-test.txt + $(ENV) $(PIP_INSTALL) -e . -r requirements-test.txt --no-build-isolation .PHONY: docker-build docker-build: ## Build the Docker image @@ -59,7 +59,7 @@ check: ## Run the test suite pycoverage: ## Run the test suite, with Python code coverage $(PYTHON) -m pytest -vvv --log-cli-level=info -s --color=yes \ --cov=pystack --cov=tests --cov-config=pyproject.toml --cov-report=term \ - --cov-append $(PYTEST_ARGS) tests --cov-fail-under=92 + --cov-append $(PYTEST_ARGS) tests --cov-fail-under=85 $(PYTHON) -m coverage lcov -i -o pycoverage.lcov genhtml *coverage.lcov --branch-coverage --output-directory pystack-coverage @@ -71,10 +71,9 @@ valgrind: ## Run valgrind, with the correct configuration .PHONY: ccoverage ccoverage: ## Run the test suite, with C++ code coverage $(MAKE) clean - CFLAGS="$(CFLAGS) -O0 -pg --coverage" CXXFLAGS="$(CXXFLAGS) -O0 -pg --coverage" $(MAKE) build + CFLAGS="-O0 -pg --coverage" CXXFLAGS="-O0 -pg --coverage" $(PIP_INSTALL) -e . --no-build-isolation $(MAKE) check - gcov -i build/*/src/pystack/_pystack -i -d - lcov --capture --directory . --output-file cppcoverage.lcov + lcov --capture --directory . --output-file cppcoverage.lcov lcov --extract cppcoverage.lcov '*/src/pystack/_pystack/*' --output-file cppcoverage.lcov genhtml *coverage.lcov --branch-coverage --output-directory pystack-coverage @@ -116,6 +115,7 @@ clean: ## Clean any built/generated artifacts find . | grep -E '(\.o|\.gcda|\.gcno|\.gcov\.json\.gz)' | xargs rm -rf find . | grep -E '(__pycache__|\.pyc|\.pyo)' | xargs rm -rf rm -rf build + rm -rf _skbuild rm -f src/pystack/_pystack.*.so rm -f {cpp,py}coverage.lcov rm -rf pystack-coverage diff --git a/pyproject.toml b/pyproject.toml index eb547704..2e7d2f69 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,47 @@ [build-system] +requires = ["scikit-build-core>=0.4", "nanobind>=1.8"] +build-backend = "scikit_build_core.build" -requires = [ - "setuptools", - "wheel", - "Cython", - "pkgconfig" +[project] +name = "pystack" +dynamic = ["version"] +description = "Analysis of the stack of remote python processes" +readme = "README.md" +requires-python = ">=3.8" +license = {text = "Apache-2.0"} +authors = [ + {name = "Pablo Galindo Salgado"} ] +classifiers = [ + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Programming Language :: Python :: Implementation :: CPython", + "Topic :: Software Development :: Debuggers", +] + +[project.urls] +Homepage = "https://github.com/bloomberg/pystack" + +[project.scripts] +pystack = "pystack.__main__:main" -build-backend = 'setuptools.build_meta' +[tool.scikit-build] +wheel.packages = ["src/pystack"] +wheel.install-dir = "pystack" +metadata.version.provider = "scikit_build_core.metadata.regex" +metadata.version.input = "src/pystack/_version.py" +sdist.include = ["src/pystack/_version.py"] + +[tool.scikit-build.cmake.define] +CMAKE_BUILD_TYPE = "Release" [tool.ruff] line-length = 95 @@ -43,7 +77,7 @@ type = [ underlines = "-~" [tool.cibuildwheel] -build = ["cp38-*", "cp39-*", "cp310-*", "cp311-*"] +build = ["cp38-*", "cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp313-*", "cp314-*"] manylinux-x86_64-image = "manylinux2014" manylinux-i686-image = "manylinux2014" musllinux-x86_64-image = "musllinux_1_2" @@ -51,7 +85,7 @@ skip = "*-musllinux_aarch64" [tool.cibuildwheel.linux] before-all = [ - "yum install -y libzstd-devel", + "yum install -y libzstd-devel cmake", "cd /", "VERS=0.193", "curl https://sourceware.org/elfutils/ftp/$VERS/elfutils-$VERS.tar.bz2 > ./elfutils.tar.bz2", @@ -74,7 +108,7 @@ before-all = [ # set the FNM_EXTMATCH macro to get the build to succeed is seen here: # https://git.alpinelinux.org/aports/tree/main/elfutils/musl-macros.patch "cd /", - "apk add --update argp-standalone bison bsd-compat-headers bzip2-dev flex-dev libtool linux-headers musl-fts-dev musl-libintl musl-obstack-dev xz-dev zlib-dev zstd-dev", + "apk add --update argp-standalone bison bsd-compat-headers bzip2-dev flex-dev libtool linux-headers musl-fts-dev musl-libintl musl-obstack-dev xz-dev zlib-dev zstd-dev cmake", "VERS=0.193", "curl https://sourceware.org/elfutils/ftp/$VERS/elfutils-$VERS.tar.bz2 > ./elfutils.tar.bz2", "tar -xf elfutils.tar.bz2", @@ -88,16 +122,12 @@ before-all = [ ] [tool.coverage.run] -plugins = [ - "Cython.Coverage", -] source = [ "src/pystack", ] branch = true parallel = true omit = [ - "stringsource", "tests/integration/*program*.py", ] diff --git a/setup.py b/setup.py deleted file mode 100644 index d76c9375..00000000 --- a/setup.py +++ /dev/null @@ -1,147 +0,0 @@ -import os -import pathlib -import sys -from sys import platform - -import pkgconfig -import setuptools -from Cython.Build import cythonize - -IS_LINUX = "linux" in platform - -if not IS_LINUX: - raise RuntimeError(f"pystack does not support this platform ({platform})") - -install_requires = [] - - -TEST_BUILD = False -if "--test-build" in sys.argv: - TEST_BUILD = True - sys.argv.remove("--test-build") - - -if os.getenv("CYTHON_TEST_MACROS", None) is not None: - TEST_BUILD = True - - -COMPILER_DIRECTIVES = { - "language_level": 3, - "embedsignature": True, - "boundscheck": False, - "wraparound": False, - "cdivision": True, - "c_string_type": "unicode", - "c_string_encoding": "utf8", - "freethreading_compatible": True, -} - -DEFINE_MACROS = [] - -if TEST_BUILD: - COMPILER_DIRECTIVES = { - "language_level": 3, - "boundscheck": True, - "embedsignature": True, - "wraparound": True, - "cdivision": False, - "profile": True, - "linetrace": True, - "overflowcheck": True, - "infer_types": True, - "c_string_type": "unicode", - "c_string_encoding": "utf8", - "freethreading_compatible": True, - } - DEFINE_MACROS.extend([("CYTHON_TRACE", "1"), ("CYTHON_TRACE_NOGIL", "1")]) - -library_flags = {"libraries": ["elf", "dw"]} - -try: - library_flags = pkgconfig.parse("libelf libdw") -except EnvironmentError as e: - print("pkg-config not found.", e) - print("Falling back to static flags.") -except pkgconfig.PackageNotFoundError as e: - print("Package Not Found", e) - print("Falling back to static flags.") - -if "define_macros" not in library_flags: - library_flags["define_macros"] = [] - -library_flags["define_macros"].extend(DEFINE_MACROS) - -PYSTACK_EXTENSION = setuptools.Extension( - name="pystack._pystack", - sources=[ - "src/pystack/_pystack.pyx", - "src/pystack/_pystack/corefile.cpp", - "src/pystack/_pystack/elf_common.cpp", - "src/pystack/_pystack/logging.cpp", - "src/pystack/_pystack/mem.cpp", - "src/pystack/_pystack/process.cpp", - "src/pystack/_pystack/pycode.cpp", - "src/pystack/_pystack/pyframe.cpp", - "src/pystack/_pystack/pythread.cpp", - "src/pystack/_pystack/pytypes.cpp", - "src/pystack/_pystack/unwinder.cpp", - "src/pystack/_pystack/version.cpp", - ], - language="c++", - extra_compile_args=["-std=c++17"], - extra_link_args=["-std=c++17"], - **library_flags, -) - -PYSTACK_EXTENSION.libraries.extend(["dl", "stdc++fs"]) - - -about = {} -with open("src/pystack/_version.py") as fp: - exec(fp.read(), about) - -HERE = pathlib.Path(__file__).parent.resolve() -LONG_DESCRIPTION = (HERE / "README.md").read_text(encoding="utf-8") - -setuptools.setup( - name="pystack", - version=about["__version__"], - python_requires=">=3.7.0", - description="Analysis of the stack of remote python processes", - long_description=LONG_DESCRIPTION, - long_description_content_type="text/markdown", - url="https://github.com/bloomberg/pystack", - author="Pablo Galindo Salgado", - classifiers=[ - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", - "Programming Language :: Python :: Implementation :: CPython", - "Topic :: Software Development :: Debuggers", - ], - package_dir={"": "src"}, - packages=["pystack"], - ext_modules=cythonize( - [PYSTACK_EXTENSION], - include_path=["src/pystack"], - compiler_directives=COMPILER_DIRECTIVES, - ), - install_requires=install_requires, - include_package_data=False, - package_data={ - "pystack": [ - "pystack/*.pyi", - "pystack/*.typed", - ] - }, - entry_points={ - "console_scripts": ["pystack=pystack.__main__:main"], - }, -) diff --git a/src/pystack/_pystack.pyi b/src/pystack/_pystack.pyi index 66603def..35e518ae 100644 --- a/src/pystack/_pystack.pyi +++ b/src/pystack/_pystack.pyi @@ -1,45 +1,68 @@ import enum import pathlib from typing import Any +from typing import Callable from typing import Dict from typing import Iterable from typing import List from typing import Optional from typing import Tuple +from typing import TypeVar from typing import Union -from .maps import VirtualMap from .types import PyThread class CoreFileAnalyzer: - @classmethod - def __init__(cls, *args: Any, **kwargs: Any) -> None: ... + def __init__( + self, + core_file: Union[str, pathlib.Path], + executable: Optional[Union[str, pathlib.Path]] = None, + lib_search_path: Optional[str] = None, + ) -> None: ... def extract_module_load_points(self) -> Dict[str, int]: ... def extract_build_ids(self) -> Iterable[Tuple[str, str, str]]: ... - def extract_executable(self) -> pathlib.Path: ... + def extract_executable(self) -> str: ... def extract_failure_info(self) -> Dict[str, Any]: ... - def extract_maps(self) -> Iterable[VirtualMap]: ... + def extract_maps(self) -> List[Dict[str, Any]]: ... def extract_pid(self) -> int: ... def extract_ps_info(self) -> Dict[str, Any]: ... def missing_modules(self) -> List[str]: ... class NativeReportingMode(enum.Enum): - ALL = ... - OFF = ... - PYTHON = ... - LAST = ... + OFF = 0 + PYTHON = 1 + ALL = 1000 + LAST = 2000 class StackMethod(enum.Enum): - ALL = 1 - ANONYMOUS_MAPS = 2 - AUTO = 3 + ELF_DATA = 1 + SYMBOLS = 2 BSS = 4 - ELF_DATA = 5 - HEAP = 6 - SYMBOLS = 7 - DEBUG_OFFSETS = 8 + ANONYMOUS_MAPS = 8 + HEAP = 16 + DEBUG_OFFSETS = 32 + AUTO = 55 # DEBUG_OFFSETS | ELF_DATA | SYMBOLS | BSS + ALL = 63 # AUTO | ANONYMOUS_MAPS | HEAP + +class ProcessManager: + pid: int + python_version: Tuple[int, int] -class ProcessManager: ... + @classmethod + def create_from_pid( + cls, pid: int, stop_process: bool = True + ) -> "ProcessManager": ... + @classmethod + def create_from_core( + cls, + core_file: Union[str, pathlib.Path], + executable: Union[str, pathlib.Path], + lib_search_path: Optional[str] = None, + ) -> "ProcessManager": ... + def interpreter_status(self) -> int: ... + def is_interpreter_active(self) -> bool: ... + def __enter__(self) -> "ProcessManager": ... + def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: ... def get_process_threads( pid: int, @@ -47,16 +70,19 @@ def get_process_threads( native_mode: NativeReportingMode = NativeReportingMode.OFF, locals: bool = False, method: StackMethod = StackMethod.AUTO, -) -> Iterable[PyThread]: ... +) -> List[PyThread]: ... def get_process_threads_for_core( - core_file: pathlib.Path, - executable: pathlib.Path, + core_file: Union[str, pathlib.Path], + executable: Union[str, pathlib.Path], library_search_path: Optional[str] = None, native_mode: NativeReportingMode = NativeReportingMode.PYTHON, locals: bool = False, method: StackMethod = StackMethod.AUTO, -) -> Iterable[PyThread]: ... -def get_bss_info(binary: Union[str, pathlib.Path]) -> Dict[str, Any]: ... -def copy_memory_from_address( - pid: int, address: int, size: int, blocking: bool = False -) -> bytes: ... +) -> List[PyThread]: ... +def get_bss_info(binary: Union[str, pathlib.Path]) -> Optional[Dict[str, Any]]: ... +def copy_memory_from_address(pid: int, address: int, size: int) -> bytes: ... +def _check_interpreter_shutdown(manager: ProcessManager) -> None: ... + +F = TypeVar("F", bound=Callable[..., Any]) + +def intercept_runtime_errors() -> Callable[[F], F]: ... diff --git a/src/pystack/_pystack.pyx b/src/pystack/_pystack.pyx deleted file mode 100644 index de16701d..00000000 --- a/src/pystack/_pystack.pyx +++ /dev/null @@ -1,782 +0,0 @@ -import contextlib -import enum -import functools -import logging -import os -import pathlib -from typing import Any -from typing import Callable -from typing import Dict -from typing import Iterable -from typing import List -from typing import Optional -from typing import Set -from typing import Tuple -from typing import TypeVar - -from cython.operator import dereference -from cython.operator import postincrement - -from _pystack.corefile cimport CoreFileExtractor -from _pystack.elf_common cimport CoreFileAnalyzer as NativeCoreFileAnalyzer -from _pystack.elf_common cimport ProcessAnalyzer as NativeProcessAnalyzer -from _pystack.elf_common cimport SectionInfo -from _pystack.elf_common cimport getSectionInfo -from _pystack.logging cimport initializePythonLoggerInterface -from _pystack.mem cimport AbstractRemoteMemoryManager -from _pystack.mem cimport MemoryMapInformation as CppMemoryMapInformation -from _pystack.mem cimport ProcessMemoryManager -from _pystack.mem cimport VirtualMap as CppVirtualMap -from _pystack.process cimport AbstractProcessManager -from _pystack.process cimport CoreFileProcessManager -from _pystack.process cimport InterpreterStatus -from _pystack.process cimport ProcessManager as NativeProcessManager -from _pystack.process cimport ProcessTracer -from _pystack.process cimport remote_addr_t -from _pystack.pycode cimport CodeObject -from _pystack.pyframe cimport FrameObject -from _pystack.pythread cimport NativeThread -from _pystack.pythread cimport Thread -from _pystack.pythread cimport getThreadFromInterpreterState -from cpython.unicode cimport PyUnicode_Decode -from libcpp.memory cimport make_shared -from libcpp.memory cimport make_unique -from libcpp.memory cimport shared_ptr -from libcpp.memory cimport unique_ptr -from libcpp.string cimport string as cppstring -from libcpp.unordered_map cimport unordered_map -from libcpp.vector cimport vector - -from .errors import CoreExecutableNotFound -from .errors import EngineError -from .errors import InvalidPythonProcess -from .errors import NotEnoughInformation -from .maps import MemoryMapInformation -from .maps import VirtualMap -from .maps import generate_maps_for_process -from .maps import generate_maps_from_core_data -from .maps import parse_maps_file -from .maps import parse_maps_file_for_binary -from .process import get_python_version_for_core -from .process import get_python_version_for_process -from .process import get_thread_name -from .types import LocationInfo -from .types import NativeFrame -from .types import PyCodeObject -from .types import PyFrame -from .types import PyThread - -LOGGER = logging.getLogger(__file__) - -initializePythonLoggerInterface() - - -class StackMethod(enum.Enum): - ELF_DATA = 1 << 0 - SYMBOLS = 1 << 1 - BSS = 1 << 2 - ANONYMOUS_MAPS = 1 << 3 - HEAP = 1 << 4 - DEBUG_OFFSETS = 1 << 5 - AUTO = DEBUG_OFFSETS | ELF_DATA | SYMBOLS | BSS - ALL = AUTO | ANONYMOUS_MAPS | HEAP - - -class NativeReportingMode(enum.Enum): - OFF = 0 - PYTHON = 1 - ALL = 1000 - LAST = 2000 - - -cdef api void log_with_python(const cppstring *message, int level) noexcept: - pymessage = _try_to_decode_string(message) - LOGGER.log(level, pymessage) - -T = TypeVar("T", bound=Callable[..., Any]) - - -class intercept_runtime_errors: - def __init__(self, exception=EngineError): - self.exception = exception - - def __call__(self, func: T) -> T: - @functools.wraps(func) - def wrapper(*args: Any, **kwargs: Any) -> Any: - try: - return func(*args, **kwargs) - except RuntimeError as e: - raise self.exception(*e.args) from e - - return wrapper - - -@intercept_runtime_errors(EngineError) -def copy_memory_from_address(pid, address, size): - cdef shared_ptr[AbstractRemoteMemoryManager] manager - cdef int the_pid = pid - cdef vector[int] tids - manager = ( - make_shared[ProcessMemoryManager](the_pid) - ) - - cdef AbstractRemoteMemoryManager *manager_handle = manager.get() - - memory = bytearray(size) - cdef char *buffer = memory - cdef remote_addr_t _address = address - manager_handle.copyMemoryFromProcess(_address, size, buffer) - manager.reset() - return memory - - -cdef CppVirtualMap _pymap_to_map(pymap: VirtualMap) except *: - default_path = "" - assert pymap is not None - return CppVirtualMap( - pymap.start, - pymap.end, - pymap.filesize, - pymap.flags, - pymap.offset, - pymap.device, - pymap.inode, - str(pymap.path) if pymap.path else default_path, - ) - - -cdef CppMemoryMapInformation _pymapinfo_to_mapinfo(map_info: MemoryMapInformation): - interpreter_map = ( - map_info.libpython if map_info.libpython is not None else map_info.python - ) - cdef CppMemoryMapInformation cppmap_info - assert(interpreter_map is not None) - cppmap_info.setMainMap(_pymap_to_map(interpreter_map)) - if map_info.bss: - cppmap_info.setBss(_pymap_to_map(map_info.bss)) - if map_info.heap: - cppmap_info.setHeap(_pymap_to_map(map_info.heap)) - - return cppmap_info - - -cdef vector[CppVirtualMap] _pymaps_to_maps(pymaps: Iterable[VirtualMap]) except *: - cdef vector[CppVirtualMap] native_maps - for pymap in pymaps: - native_maps.push_back(_pymap_to_map(pymap)) - return native_maps - - -def get_bss_info(binary): - cdef SectionInfo _result - if getSectionInfo(str(binary), b".bss", &_result): - result = _result - return result - return None - -###################### -# MANAGEMENT CLASSES # -###################### - -cdef shared_ptr[NativeCoreFileAnalyzer] get_core_analyzer( - core_file, executable=None, lib_search_path=None -) except *: - cdef shared_ptr[NativeCoreFileAnalyzer] analyzer; - cdef cppstring the_core_file, the_executable, the_lib_search_path - the_core_file = str(core_file) - if executable is not None and lib_search_path is not None: - the_executable = str(executable) - the_lib_search_path = str(lib_search_path) - analyzer = make_shared[NativeCoreFileAnalyzer]( - the_core_file, the_executable, the_lib_search_path - ) - elif executable is not None and lib_search_path is None: - the_executable = str(executable) - analyzer = make_shared[NativeCoreFileAnalyzer](the_core_file, the_executable) - else: - analyzer = make_shared[NativeCoreFileAnalyzer](the_core_file) - return analyzer - - -cdef class CoreFileAnalyzer: - cdef shared_ptr[CoreFileExtractor] _core_analyzer - cdef object ignored_libs - - def __cinit__(self, core_file, executable=None, lib_search_path=None): - self.ignored_libs = frozenset(("ld-linux", "linux-vdso")) - self._initialize_core_analyzer(core_file, executable, lib_search_path) - - @intercept_runtime_errors(EngineError) - def _initialize_core_analyzer(self, core_file, executable, lib_search_path) -> None: - cdef shared_ptr[NativeCoreFileAnalyzer] analyzer = get_core_analyzer( - core_file, executable, lib_search_path - ) - self._core_analyzer = make_shared[CoreFileExtractor](analyzer) - - @intercept_runtime_errors(EngineError) - def extract_maps(self) -> Iterable[VirtualMap]: - mapped_files = self._core_analyzer.get().extractMappedFiles() - memory_maps = self._core_analyzer.get().MemoryMaps() - return generate_maps_from_core_data(mapped_files, memory_maps) - - @intercept_runtime_errors(EngineError) - def extract_pid(self) -> int: - return self._core_analyzer.get().Pid() - - @intercept_runtime_errors(CoreExecutableNotFound) - def extract_executable(self) -> pathlib.Path: - return pathlib.Path(self._core_analyzer.get().extractExecutable()) - - @intercept_runtime_errors(EngineError) - def extract_failure_info(self) -> Dict[str, Any]: - return self._core_analyzer.get().extractFailureInfo() - - @intercept_runtime_errors(EngineError) - def extract_ps_info(self) -> Dict[str, Any]: - return self._core_analyzer.get().extractPSInfo() - - cdef _is_ignored_lib(self, object path): - return any(prefix in str(path) for prefix in self.ignored_libs) - - @intercept_runtime_errors(EngineError) - def missing_modules(self) -> Set[str]: - cdef set result = set() - cdef set missing_mod_names = set() - for mod in self._core_analyzer.get().missingModules(): - path = pathlib.Path(mod) - if not self._is_ignored_lib(path): - result.add(path) - missing_mod_names.add(path.name) - for memmap in self._core_analyzer.get().MemoryMaps(): - path = pathlib.Path(memmap.path) - if path.exists() or self._is_ignored_lib(path): - continue - if path.name not in missing_mod_names: - result.add(path) - return result - - @intercept_runtime_errors(EngineError) - def extract_module_load_points(self) -> Dict[str, int]: - return { - pathlib.Path(mod.filename).name: mod.start - for mod in self._core_analyzer.get().ModuleInformation() - } - - @intercept_runtime_errors(EngineError) - def extract_build_ids(self) -> Tuple[str, str, str]: - cdef object memory_maps = self._core_analyzer.get().MemoryMaps() - cdef object module_information = self._core_analyzer.get().ModuleInformation() - memory_maps_by_file = {map['path']: map['buildid'] for map in memory_maps} - for module in module_information: - filename = module['filename'] - if self._is_ignored_lib(filename): - continue - mod_buildid = module['buildid'] - map_buildid = memory_maps_by_file.get(filename) - yield (filename, mod_buildid, map_buildid) - -cdef class ProcessManager: - cdef shared_ptr[AbstractProcessManager] _manager - - cdef public object pid - cdef public object python_version - cdef public object virtual_maps - cdef public object map_info - - def __init__(self, pid, python_version, memory_maps, map_info): - self.pid = pid - self.python_version = python_version - self.virtual_maps = memory_maps - self.map_info = map_info - - @classmethod - def create_from_pid(cls, int pid, bint stop_process): - cdef shared_ptr[ProcessTracer] tracer - if stop_process: - tracer = make_shared[ProcessTracer](pid) - - virtual_maps = list(generate_maps_for_process(pid)) - map_info = parse_maps_file(pid, virtual_maps) - - cdef shared_ptr[NativeProcessAnalyzer] analyzer = make_shared[ - NativeProcessAnalyzer - ](pid) - cdef shared_ptr[AbstractProcessManager] native_manager = ( - make_shared[NativeProcessManager]( - pid, tracer, analyzer, - _pymaps_to_maps(virtual_maps), - _pymapinfo_to_mapinfo(map_info), - ) - ) - - native_manager.get().setPythonVersionFromDebugOffsets() - python_version = native_manager.get().findPythonVersion() - if python_version == (-1, -1): - python_version = get_python_version_for_process(pid, map_info) - native_manager.get().setPythonVersion(python_version) - - cdef ProcessManager new_manager = cls( - pid, python_version, virtual_maps, map_info - ) - new_manager._manager = native_manager - return new_manager - - @classmethod - def create_from_core( - cls, - core_file: pathlib.Path, - executable: pathlib.Path, - lib_search_path: Optional[pathlib.Path], - ): - cdef shared_ptr[NativeCoreFileAnalyzer] analyzer = get_core_analyzer( - core_file, executable, lib_search_path - ) - cdef unique_ptr[CoreFileExtractor] core_extractor = make_unique[ - CoreFileExtractor - ](analyzer) - - mapped_files = core_extractor.get().extractMappedFiles() - memory_maps = core_extractor.get().MemoryMaps() - load_point_by_module = { - pathlib.Path(mod.filename).name: mod.start - for mod in core_extractor.get().ModuleInformation() - } - - virtual_maps = list( - generate_maps_from_core_data(mapped_files, memory_maps) - ) - pid = core_extractor.get().Pid() - map_info = parse_maps_file_for_binary(executable, virtual_maps, load_point_by_module) - - the_core_file = str(core_file) - the_executable = str(executable) - maps = _pymaps_to_maps(virtual_maps) - native_map_info = _pymapinfo_to_mapinfo(map_info) - cdef shared_ptr[AbstractProcessManager] native_manager = ( - make_shared[CoreFileProcessManager](pid, analyzer, maps, native_map_info) - ) - - native_manager.get().setPythonVersionFromDebugOffsets() - python_version = native_manager.get().findPythonVersion() - if python_version == (-1, -1): - python_version = get_python_version_for_core(core_file, executable, map_info) - native_manager.get().setPythonVersion(python_version) - - cdef ProcessManager new_manager = cls( - pid, python_version, virtual_maps, map_info - ) - new_manager._manager = native_manager - - return new_manager - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self._manager.reset() - - cdef shared_ptr[AbstractProcessManager] get_manager(self): - assert self._manager.get() != NULL - return self._manager - - def interpreter_status(self) -> int: - return self._manager.get().isInterpreterActive() - - def is_interpreter_active(self) -> bool: - return self._manager.get().isInterpreterActive() == InterpreterStatus.RUNNING - - -###################################### -# COMMON STACK-RETRIEVING FUNCTIONS # -###################################### - -cdef object _try_to_decode_string(const cppstring *the_string): - return PyUnicode_Decode(the_string.c_str(), the_string.size(), NULL, "replace") - -cdef object _safe_cppmap_to_py(unordered_map[cppstring, cppstring] themap): - cdef unordered_map[cppstring, cppstring] . iterator it = themap.begin() - cdef dict result = {} - while it != themap.end(): - key = _try_to_decode_string(&(dereference(it).first)) - val = _try_to_decode_string(&(dereference(it).second)) - result[key] = val - postincrement(it) - - return result - -cdef object _construct_frame_stack_from_thread_object( - ssize_t pid, bint resolve_locals, FrameObject *first_frame -): - cdef CodeObject *current_code = NULL - cdef FrameObject *current_frame = first_frame - - last_frame = None - - while current_frame != NULL: - current_code = current_frame.Code().get() - - if not current_code: - current_frame = ( - current_frame.PreviousFrame().get() - if current_frame.PreviousFrame() - else NULL - ) - continue - - filename = current_code.Filename() - location_info = LocationInfo( - current_code.Location().lineno, - current_code.Location().end_lineno, - current_code.Location().column, - current_code.Location().end_column, - ) - py_code = PyCodeObject(filename, current_code.Scope(), location_info) - - if resolve_locals: - current_frame.resolveLocalVariables() - - args = _safe_cppmap_to_py(current_frame.Arguments()) - locals = _safe_cppmap_to_py(current_frame.Locals()) - is_entry = current_frame.IsEntryFrame() - is_shim = current_frame.IsShim() - py_frame = PyFrame(None, None, py_code, args, locals, is_entry, is_shim) - - py_frame.next = last_frame - if last_frame: - last_frame.prev = py_frame - - last_frame = py_frame - current_frame = ( - current_frame.PreviousFrame().get() - if current_frame.PreviousFrame() - else NULL - ) - - return last_frame - -cdef object _construct_threads_from_interpreter_state( - shared_ptr[AbstractProcessManager] manager, - remote_addr_t head, - int pid, - object python_version, - bint add_native_traces, - bint resolve_locals, -): - LOGGER.info("Fetching Python threads") - threads = [] - - cdef shared_ptr[Thread] thread = getThreadFromInterpreterState(manager, head) - cdef Thread *current_thread = thread.get() - while current_thread != NULL: - LOGGER.info("Constructing new Python thread with tid %s", current_thread.Tid()) - if add_native_traces: - current_thread.populateNativeStackTrace(manager) - frame = _construct_frame_stack_from_thread_object( - pid, resolve_locals, current_thread.FirstFrame().get() - ) - native_frames = [ - NativeFrame(**native_frame) - for native_frame in list(current_thread.NativeFrames()) - ] - threads.append( - PyThread( - current_thread.Tid(), - frame, - native_frames[::-1], - current_thread.isGilHolder(), - current_thread.isGCCollecting(), - python_version, - name=get_thread_name(pid, current_thread.Tid()), - ) - ) - current_thread = ( - current_thread.NextThread().get() if current_thread.NextThread() else NULL - ) - - return threads - -cdef object _construct_os_thread( - shared_ptr[AbstractProcessManager] manager, int pid, int tid -): - cdef unique_ptr[NativeThread] thread = make_unique[NativeThread](pid, tid) - thread.get().populateNativeStackTrace(manager) - native_frames = [ - NativeFrame(**native_frame) - for native_frame in list(thread.get().NativeFrames()) - ] - LOGGER.info("Constructing new native thread with tid %s", tid) - pythread = PyThread( - tid, - None, - native_frames[::-1], - False, - False, - None, - name=get_thread_name(pid, tid), - ) - - return pythread - -cdef object _construct_os_threads( - shared_ptr[AbstractProcessManager] manager, int pid, object tids -): - LOGGER.info("Fetching native threads") - threads = [] - for tid in tids: - threads.append(_construct_os_thread(manager, pid, tid)) - - return threads - -cdef remote_addr_t _get_interpreter_state_addr( - AbstractProcessManager *manager, object method, int core=False -) except*: - cdef remote_addr_t head = 0 - possible_methods = [ - StackMethod.DEBUG_OFFSETS, - StackMethod.ELF_DATA, - StackMethod.SYMBOLS, - StackMethod.BSS, - StackMethod.ANONYMOUS_MAPS, - StackMethod.HEAP, - ] - - for possible_method in possible_methods: - if method.value & possible_method.value == 0: - continue - - try: - if possible_method == StackMethod.DEBUG_OFFSETS: - how = "using debug offsets data" - head = manager.findInterpreterStateFromDebugOffsets() - elif possible_method == StackMethod.ELF_DATA: - how = "using ELF data" - head = manager.findInterpreterStateFromElfData() - elif possible_method == StackMethod.SYMBOLS: - how = "using symbols" - head = manager.findInterpreterStateFromSymbols() - elif possible_method == StackMethod.BSS: - how = "scanning the BSS" - head = manager.scanBSS() - elif possible_method == StackMethod.ANONYMOUS_MAPS: - how = "scanning all anonymous maps" - head = manager.scanAllAnonymousMaps() - elif possible_method == StackMethod.HEAP: - how = "scanning the heap" - head = manager.scanHeap() - except Exception as exc: - LOGGER.warning( - "Unexpected error finding PyInterpreterState by %s: %s", how, exc - ) - - if head: - LOGGER.info("PyInterpreterState found by %s at address 0x%0.2X", how, head) - return head - else: - LOGGER.info("Address of PyInterpreterState not found by %s", how) - - LOGGER.info("Address of PyInterpreterState could not be found") - return 0 - - -def _check_interpreter_shutdown(manager): - status = manager.interpreter_status() - if status == InterpreterStatus.UNKNOWN: - return - if status == InterpreterStatus.FINALIZED: - msg = ( - "The interpreter is shutting itself down so it is possible that no Python" - " stack trace is available for inspection. You can still use --native-all " - " to force displaying all the threads." - ) - LOGGER.warning(msg) - else: - LOGGER.info("An active interpreter has been detected") - - -##################### -# PROCESS FUNCTIONS # -##################### - - -def _get_process_threads( - pymanager: ProcessManager, - pid: int, - native_mode: NativeReportingMode, - resolve_locals: bool, - method: StackMethod, -): - LOGGER.debug("Available memory maps for process:") - for mem_map in pymanager.virtual_maps: - LOGGER.debug(mem_map) - - cdef shared_ptr[AbstractProcessManager] manager = pymanager.get_manager() - - if native_mode != NativeReportingMode.ALL: - _check_interpreter_shutdown(pymanager) - - cdef remote_addr_t head = _get_interpreter_state_addr(manager.get(), method) - - if not head and native_mode != NativeReportingMode.ALL: - raise NotEnoughInformation( - "Could not gather enough information to extract the Python frame information" - ) - - all_tids = list(manager.get().Tids()) - if head: - add_native_traces = native_mode != NativeReportingMode.OFF - for thread in _construct_threads_from_interpreter_state( - manager, - head, - pid, - pymanager.python_version, - add_native_traces, - resolve_locals, - ): - if thread.tid in all_tids: - all_tids.remove(thread.tid) - yield thread - - if native_mode == NativeReportingMode.ALL: - yield from _construct_os_threads(manager, pid, all_tids) - - -def get_process_threads( - pid: int, - stop_process: bool = True, - native_mode: NativeReportingMode = NativeReportingMode.OFF, - locals: bool = False, - method: StackMethod = StackMethod.AUTO, -) -> Iterable[PyThread]: - """Return an iterable of Thread objects that are registered with the remote interpreter - - Args: - pid (int): The pid of the remote process - stop_process (bool): If *True*, stop the process for analysis and use - blocking APis to obtain remote information. - native_mode (NativeReportingMode): If set to PYTHON, include the - native (C/C++) stack in the returned Thread objects for all threads - registered with the interpreter. If set to ALL, native stacks - from threads not registered with the interpreter will be provided - as well. By default this is set to OFF and native stacks are not - returned. - locals (bool): If **True**, retrieve the local variables and arguments for - every retrieved frame (may slow down the processing). - method (StackMethod): The method to locate the relevant Python structs - that are needed to unwind the Python stack. - - Returns: - Iterable of Thread objects. - """ - if not isinstance(method, StackMethod): - raise ValueError("Invalid method for stack analysis") - - LOGGER.info( - "Analyzing process with pid %s using stack method %s with native mode %s", - pid, - method, - native_mode, - ) - - try: - with ProcessManager.create_from_pid(pid, stop_process) as manager: - yield from _get_process_threads(manager, pid, native_mode, locals, method) - except RuntimeError as e: - raise EngineError(*e.args, pid=pid) from e - - -###################### -# COREFILE FUNCTIONS # -###################### - - -def get_process_threads_for_core( - core_file: pathlib.Path, - executable: pathlib.Path, - library_search_path: str = None, - native_mode: NativeReportingMode = NativeReportingMode.PYTHON, - locals: bool = False, - method: StackMethod = StackMethod.AUTO, -) -> Iterable[PyThread]: - """Return an iterable of Thread objects that are registered with the given core file - - Args: - core_file (pathlib.Path): The location of the core file to analyze. - executable (pathlib.Path): The location of the executable that the core file - was created from. - library_search_path (str): A ":"-separated list of directories to use when - trying to locate missing shared libraries in the core file. - native_mode (NativeReportingMode): If set to PYTHON, include the - native (C/C++) stack in the returned Thread objects for all threads - registered with the interpreter. If set to ALL, native stacks - from threads not registered with the interpreter will be provided - as well. By default this is set to OFF and native stacks are not - returned. - locals (bool): If **True**, retrieve the local variables and arguments for - every retrieved frame (may slow down the processing). - method (StackMethod): The method to locate the relevant Python structs - that are needed to unwind the Python stack. - - Returns: - Iterable of Thread objects. - """ - if not isinstance(method, StackMethod): - raise ValueError("Invalid method for stack analysis") - - LOGGER.info( - "Analyzing core file %s with executable %s using stack method %s with native mode %s", - core_file, - executable, - method, - native_mode, - ) - try: - yield from _get_process_threads_for_core( - core_file, executable, library_search_path, native_mode, locals, method - ) - except RuntimeError as e: - raise EngineError(*e.args, corefile=core_file) from e - - -def _get_process_threads_for_core( - corefile: pathlib.Path, - executable: pathlib.Path, - library_search_path: str = None, - native_mode: NativeReportingMode = NativeReportingMode.PYTHON, - locals: bool = False, - method: StackMethod = StackMethod.AUTO, -) -> Iterable[PyThread]: - cdef ProcessManager pymanager = ProcessManager.create_from_core( - corefile, executable, library_search_path - ) - - LOGGER.debug("Available memory maps for core:") - for mem_map in pymanager.virtual_maps: - LOGGER.debug(mem_map) - - cdef shared_ptr[AbstractProcessManager] manager = pymanager.get_manager() - - if native_mode != NativeReportingMode.ALL: - _check_interpreter_shutdown(pymanager) - - cdef remote_addr_t head = _get_interpreter_state_addr( - manager.get(), method, core=True - ) - - if not head and native_mode != NativeReportingMode.ALL: - raise NotEnoughInformation( - "Could not gather enough information to extract the Python frame information" - ) - - all_tids = list(manager.get().Tids()) - - if head: - native = native_mode in {NativeReportingMode.PYTHON, NativeReportingMode.ALL} - for thread in _construct_threads_from_interpreter_state( - manager, head, pymanager.pid, pymanager.python_version, native, locals - ): - if thread.tid in all_tids: - all_tids.remove(thread.tid) - yield thread - - if native_mode == NativeReportingMode.ALL: - yield from _construct_os_threads(manager, pymanager.pid, all_tids) diff --git a/src/pystack/_pystack/CMakeLists.txt b/src/pystack/_pystack/CMakeLists.txt index 5a0fd8a8..3621e1fa 100644 --- a/src/pystack/_pystack/CMakeLists.txt +++ b/src/pystack/_pystack/CMakeLists.txt @@ -1,26 +1,69 @@ -cmake_minimum_required(VERSION 2.8) -project(_pystack) -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) - -find_package(PythonInterp 3.7 REQUIRED) -find_package(PythonLibs 3.7 REQUIRED) -IF(NOT PYTHONLIBS_FOUND OR NOT PYTHON_EXECUTABLE) - MESSAGE(SEND_ERROR "You need Python to build Python binding") -ENDIF(NOT PYTHONLIBS_FOUND OR NOT PYTHON_EXECUTABLE) - -add_library(_pystack STATIC - corefile.cpp - unwinder.cpp - logging.cpp - mem.cpp - process.cpp - pycode.cpp - pyframe.cpp - pythread.cpp - version.cpp - elf_common.cpp - pytypes.cpp) -set_property(TARGET _pystack PROPERTY POSITION_INDEPENDENT_CODE ON) -include_directories("." "cpython" ${PYTHON_INCLUDE_DIRS}) +# PyStack C++ extension module via nanobind + +# Find pthreads +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + +# Collect all C++ source files +set(PYSTACK_SOURCES + corefile.cpp + elf_common.cpp + logging.cpp + maps_parser.cpp + mem.cpp + process.cpp + pycode.cpp + pyframe.cpp + pythread.cpp + pytypes.cpp + thread_builder.cpp + unwinder.cpp + version.cpp + version_detector.cpp + bindings.cpp +) + +# Create the nanobind module +nanobind_add_module( + _pystack + STABLE_ABI + NB_STATIC + ${PYSTACK_SOURCES} +) + +# Include directories +# Note: We only include the source directory, not cpython/ directly. +# The cpython headers are included with "cpython/..." prefix to avoid +# conflicts with system headers (e.g., cpython/pthread.h vs /usr/include/pthread.h) +target_include_directories(_pystack PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${LIBELF_INCLUDE_DIRS} + ${LIBDW_INCLUDE_DIRS} +) + +# Link libraries +target_link_libraries(_pystack PRIVATE + ${LIBELF_LIBRARIES} + ${LIBDW_LIBRARIES} + Threads::Threads + dl + stdc++fs +) + +# Compiler definitions +target_compile_definitions(_pystack PRIVATE + ${LIBELF_CFLAGS_OTHER} + ${LIBDW_CFLAGS_OTHER} +) + +# Add pthread compile options (needed for C++ threading support on Linux) +target_compile_options(_pystack PRIVATE -pthread) + +# Link directories +target_link_directories(_pystack PRIVATE + ${LIBELF_LIBRARY_DIRS} + ${LIBDW_LIBRARY_DIRS} +) + +# Install the module (destination is relative to wheel.install-dir in pyproject.toml) +install(TARGETS _pystack LIBRARY DESTINATION .) diff --git a/src/pystack/_pystack/__init__.pxd b/src/pystack/_pystack/__init__.pxd deleted file mode 100644 index e69de29b..00000000 diff --git a/src/pystack/_pystack/bindings.cpp b/src/pystack/_pystack/bindings.cpp new file mode 100644 index 00000000..14a6ba86 --- /dev/null +++ b/src/pystack/_pystack/bindings.cpp @@ -0,0 +1,791 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "corefile.h" +#include "elf_common.h" +#include "logging.h" +#include "maps_parser.h" +#include "mem.h" +#include "process.h" +#include "thread_builder.h" + +namespace nb = nanobind; +using namespace nb::literals; +// Note: We don't use "using namespace pystack;" because it conflicts with Python's PyObject + +// Simple exception classes that store the message +class NotEnoughInformationError : public std::exception +{ + public: + explicit NotEnoughInformationError(const std::string& message) + : d_message(message) + { + } + const char* what() const noexcept override + { + return d_message.c_str(); + } + + private: + std::string d_message; +}; + +class EngineError : public std::exception +{ + public: + explicit EngineError(const std::string& message) + : d_message(message) + { + } + const char* what() const noexcept override + { + return d_message.c_str(); + } + + private: + std::string d_message; +}; + +[[noreturn]] void +raise_not_enough_information(const char* message) +{ + throw NotEnoughInformationError(message); +} + +// StackMethod enum values (must match Python enum) +enum class StackMethod { + ELF_DATA = 1 << 0, + SYMBOLS = 1 << 1, + BSS = 1 << 2, + ANONYMOUS_MAPS = 1 << 3, + HEAP = 1 << 4, + DEBUG_OFFSETS = 1 << 5, + AUTO = (1 << 5) | (1 << 0) | (1 << 1) | (1 << 2), + ALL = (1 << 5) | (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4), +}; + +enum class NativeReportingMode { + OFF = 0, + PYTHON = 1, + ALL = 1000, + LAST = 2000, +}; + +class CoreFileAnalyzerWrapper +{ + public: + CoreFileAnalyzerWrapper( + const std::filesystem::path& corefile, + std::optional executable = std::nullopt, + std::optional lib_search_path = std::nullopt) + : d_ignored_libs({"ld-linux", "linux-vdso"}) + { + std::string corefile_str = corefile.string(); + if (executable && lib_search_path) { + d_analyzer = std::make_shared( + corefile_str, + executable->string(), + lib_search_path->string()); + } else if (executable) { + d_analyzer = std::make_shared(corefile_str, executable->string()); + } else { + d_analyzer = std::make_shared(corefile_str); + } + d_extractor = std::make_unique(d_analyzer); + } + + nb::list extract_maps() + { + auto mapped_files = d_extractor->extractMappedFiles(); + auto memory_maps = d_extractor->MemoryMaps(); + auto maps = parseCoreFileMaps(mapped_files, memory_maps); + + nb::module_ pystack_maps = nb::module_::import_("pystack.maps"); + nb::object VirtualMap = pystack_maps.attr("VirtualMap"); + + nb::list result; + for (const auto& map : maps) { + std::string path_str = map.Path(); + nb::object path_obj = + path_str.empty() ? nb::none() : nb::cast(std::filesystem::path(path_str)); + nb::object vm = VirtualMap( + map.Start(), + map.End(), + map.FileSize(), + map.Offset(), + map.Device(), + map.Flags(), + map.Inode(), + path_obj); + result.append(vm); + } + return result; + } + + int extract_pid() + { + return d_extractor->Pid(); + } + + std::filesystem::path extract_executable() + { + return std::filesystem::path(d_extractor->extractExecutable()); + } + + nb::dict extract_failure_info() + { + auto info = d_extractor->extractFailureInfo(); + nb::dict result; + result["si_signo"] = info.si_signo; + result["si_errno"] = info.si_errno; + result["si_code"] = info.si_code; + result["sender_pid"] = info.sender_pid; + result["sender_uid"] = info.sender_uid; + result["failed_addr"] = info.failed_addr; + return result; + } + + nb::dict extract_ps_info() + { + auto info = d_extractor->extractPSInfo(); + nb::dict result; + result["state"] = static_cast(info.state); + result["sname"] = static_cast(info.sname); + result["zomb"] = static_cast(info.zomb); + result["nice"] = static_cast(info.nice); + result["flag"] = info.flag; + result["uid"] = info.uid; + result["gid"] = info.gid; + result["pid"] = info.pid; + result["ppid"] = info.ppid; + result["pgrp"] = info.pgrp; + result["sid"] = info.sid; + result["fname"] = std::string(info.fname); + result["psargs"] = std::string(info.psargs); + return result; + } + + std::vector missing_modules() + { + std::vector result; + for (const auto& mod : d_extractor->missingModules()) { + if (!isIgnoredLib(mod)) { + result.push_back(mod); + } + } + for (const auto& memmap : d_extractor->MemoryMaps()) { + std::string path = memmap.path; + if (path.empty() || isIgnoredLib(path)) { + continue; + } + // Check if path exists + std::ifstream f(path); + if (f.good()) { + continue; + } + // Check if already in result + auto fname = std::filesystem::path(path).filename().string(); + bool found = false; + for (const auto& r : result) { + if (std::filesystem::path(r).filename().string() == fname) { + found = true; + break; + } + } + if (!found) { + result.push_back(path); + } + } + return result; + } + + nb::dict extract_module_load_points() + { + nb::dict result; + for (const auto& mod : d_extractor->ModuleInformation()) { + auto name = std::filesystem::path(mod.filename).filename().string(); + result[nb::cast(name)] = mod.start; + } + return result; + } + + nb::list extract_build_ids() + { + nb::list result; + auto memory_maps = d_extractor->MemoryMaps(); + auto module_info = d_extractor->ModuleInformation(); + + std::unordered_map maps_by_file; + for (const auto& map : memory_maps) { + maps_by_file[map.path] = map.buildid; + } + + for (const auto& mod : module_info) { + if (isIgnoredLib(mod.filename)) { + continue; + } + auto map_buildid_it = maps_by_file.find(mod.filename); + std::string map_buildid = + (map_buildid_it != maps_by_file.end()) ? map_buildid_it->second : ""; + result.append(nb::make_tuple(mod.filename, mod.buildid, map_buildid)); + } + return result; + } + + private: + bool isIgnoredLib(const std::string& path) + { + for (const auto& prefix : d_ignored_libs) { + if (path.find(prefix) != std::string::npos) { + return true; + } + } + return false; + } + + std::shared_ptr d_analyzer; + std::unique_ptr d_extractor; + std::vector d_ignored_libs; +}; + +class ProcessManagerWrapper +{ + public: + explicit ProcessManagerWrapper(std::shared_ptr manager) + : d_manager(std::move(manager)) + { + } + + static std::unique_ptr create_from_pid(pid_t pid, bool stop_process) + { + auto manager = pystack::ProcessManager::create(pid, stop_process); + return std::make_unique(std::move(manager)); + } + + static std::unique_ptr create_from_core( + const std::filesystem::path& core_file, + const std::filesystem::path& executable, + std::optional lib_search_path) + { + std::optional lib_path_str; + if (lib_search_path) { + lib_path_str = lib_search_path->string(); + } + auto manager = pystack::CoreFileProcessManager::create( + core_file.string(), + executable.string(), + lib_path_str); + return std::make_unique(std::move(manager)); + } + + int interpreter_status() + { + return static_cast(d_manager->isInterpreterActive()); + } + + bool is_interpreter_active() + { + return d_manager->isInterpreterActive() + == pystack::AbstractProcessManager::InterpreterStatus::RUNNING; + } + + void reset() + { + d_manager.reset(); + } + + pid_t pid() const + { + return d_manager->Pid(); + } + + std::pair python_version() const + { + return d_manager->Version(); + } + + const std::vector& virtual_maps() const + { + return d_manager->MemoryMaps(); + } + + std::shared_ptr get_manager() + { + return d_manager; + } + + private: + std::shared_ptr d_manager; +}; + +nb::bytes +copy_memory_from_address(pid_t pid, uintptr_t address, size_t size) +{ + auto manager = std::make_shared(pid); + std::vector buffer(size); + manager->copyMemoryFromProcess(address, size, buffer.data()); + return nb::bytes(buffer.data(), buffer.size()); +} + +nb::object +get_bss_info(const std::filesystem::path& binary) +{ + pystack::SectionInfo info; + if (pystack::getSectionInfo(binary.string(), ".bss", &info)) { + nb::dict result; + result["name"] = info.name; + result["flags"] = info.flags; + result["addr"] = info.addr; + result["corrected_addr"] = info.corrected_addr; + result["offset"] = info.offset; + result["size"] = info.size; + return result; + } + return nb::none(); +} + +// Helper struct to hold Python type objects for thread building +struct PyTypes +{ + nb::object PyThread; + nb::object PyFrame; + nb::object PyCodeObject; + nb::object LocationInfo; + nb::object NativeFrame; + + static PyTypes load() + { + nb::module_ pystack_types = nb::module_::import_("pystack.types"); + return {pystack_types.attr("PyThread"), + pystack_types.attr("PyFrame"), + pystack_types.attr("PyCodeObject"), + pystack_types.attr("LocationInfo"), + pystack_types.attr("NativeFrame")}; + } +}; + +// Build frame chain from C++ thread data +nb::object +buildFrameChain(const pystack::PyThreadData& thread, const PyTypes& types) +{ + nb::object first_frame = nb::none(); + nb::object prev_frame = nb::none(); + + // Frames from C++ are in innermost-to-outermost order + // Python iterates via .next and expects: -> first_func -> second_func -> third_func + // So we iterate in reverse to build the list in the correct order + for (auto it = thread.frames.rbegin(); it != thread.frames.rend(); ++it) { + const auto& frame_data = *it; + nb::object location = types.LocationInfo( + frame_data.code.location.lineno, + frame_data.code.location.end_lineno, + frame_data.code.location.column, + frame_data.code.location.end_column); + nb::object code = types.PyCodeObject(frame_data.code.filename, frame_data.code.scope, location); + + nb::dict args; + for (const auto& [k, v] : frame_data.arguments) { + args[nb::cast(k)] = v; + } + nb::dict locs; + for (const auto& [k, v] : frame_data.locals) { + locs[nb::cast(k)] = v; + } + + nb::object py_frame = types.PyFrame( + prev_frame, + nb::none(), + code, + args, + locs, + frame_data.is_entry, + frame_data.is_shim); + + if (!prev_frame.is_none()) { + prev_frame.attr("next") = py_frame; + } + + if (first_frame.is_none()) { + first_frame = py_frame; + } + prev_frame = py_frame; + } + + return first_frame; +} + +// Build native frames list +nb::list +buildNativeFramesList(const std::vector& native_frames, const PyTypes& types) +{ + nb::list result; + for (const auto& nf : native_frames) { + result.append(types.NativeFrame( + nf.address, + nf.symbol, + nf.path, + nf.linenumber, + nf.colnumber, + nf.library)); + } + return result; +} + +// Build a Python thread object from C++ thread data +nb::object +buildPyThreadObject( + const pystack::PyThreadData& thread, + const PyTypes& types, + std::pair python_version) +{ + nb::object first_frame = buildFrameChain(thread, types); + nb::list native_frames = buildNativeFramesList(thread.native_frames, types); + + return types.PyThread( + thread.tid, + first_frame, + native_frames, + thread.gil_status, + thread.gc_status, + nb::make_tuple(python_version.first, python_version.second), + "name"_a = thread.name ? nb::cast(*thread.name) : nb::none()); +} + +// Build a native-only thread object (no Python frames) +nb::object +buildNativeOnlyThreadObject(const pystack::PyThreadData& thread, const PyTypes& types) +{ + nb::list native_frames = buildNativeFramesList(thread.native_frames, types); + + return types.PyThread( + thread.tid, + nb::none(), + native_frames, + 0, + 0, + nb::none(), + "name"_a = thread.name ? nb::cast(*thread.name) : nb::none()); +} + +// Log interpreter status +void +logInterpreterStatus(int status) +{ + if (status == static_cast(pystack::AbstractProcessManager::InterpreterStatus::FINALIZED)) { + pystack::LOG(pystack::WARNING) + << "The interpreter is shutting itself down so it is possible that no " + "Python stack trace is available for inspection."; + } else if (status == static_cast(pystack::AbstractProcessManager::InterpreterStatus::RUNNING)) { + pystack::LOG(pystack::INFO) << "An active interpreter has been detected"; + } +} + +// Log available memory maps +void +logMemoryMaps(const std::vector& maps, const char* source) +{ + pystack::LOG(pystack::DEBUG) << "Available memory maps for " << source << ":"; + for (const auto& map : maps) { + pystack::LOG(pystack::DEBUG) + << " " << std::hex << map.Start() << "-" << map.End() << " " << map.Path(); + } +} + +nb::object +get_process_threads( + pid_t pid, + bool stop_process, + NativeReportingMode native_mode, + bool locals, + StackMethod method) +{ + auto types = PyTypes::load(); + + try { + auto manager = ProcessManagerWrapper::create_from_pid(pid, stop_process); + logMemoryMaps(manager->virtual_maps(), "process"); + + if (native_mode != NativeReportingMode::ALL) { + logInterpreterStatus(manager->interpreter_status()); + } + + pystack::remote_addr_t head = + pystack::getInterpreterStateAddr(manager->get_manager().get(), static_cast(method)); + + if (head == 0 && native_mode != NativeReportingMode::ALL) { + raise_not_enough_information( + "Could not gather enough information to extract the Python frame information"); + } + + nb::list result; + std::vector all_tids = pystack::getThreadIds(manager->get_manager()); + + if (head != 0) { + bool add_native = native_mode != NativeReportingMode::OFF; + auto threads = pystack::buildThreadsFromInterpreter( + manager->get_manager(), + head, + pid, + add_native, + locals); + + for (const auto& thread : threads) { + result.append(buildPyThreadObject(thread, types, manager->python_version())); + all_tids.erase( + std::remove(all_tids.begin(), all_tids.end(), thread.tid), + all_tids.end()); + } + } + + if (native_mode == NativeReportingMode::ALL) { + for (int tid : all_tids) { + auto thread = pystack::buildNativeThread(manager->get_manager(), pid, tid); + result.append(buildNativeOnlyThreadObject(thread, types)); + } + } + + manager->reset(); + return result; + } catch (const NotEnoughInformationError&) { + throw; + } catch (const EngineError&) { + throw; + } catch (const std::exception& e) { + throw EngineError(e.what()); + } +} + +nb::object +get_process_threads_for_core( + const std::filesystem::path& core_file, + const std::filesystem::path& executable, + std::optional library_search_path, + NativeReportingMode native_mode, + bool locals, + StackMethod method) +{ + auto types = PyTypes::load(); + + try { + auto manager = + ProcessManagerWrapper::create_from_core(core_file, executable, library_search_path); + logMemoryMaps(manager->virtual_maps(), "core"); + + if (native_mode != NativeReportingMode::ALL) { + logInterpreterStatus(manager->interpreter_status()); + } + + pystack::remote_addr_t head = + pystack::getInterpreterStateAddr(manager->get_manager().get(), static_cast(method)); + + if (head == 0 && native_mode != NativeReportingMode::ALL) { + raise_not_enough_information( + "Could not gather enough information to extract the Python frame information"); + } + + nb::list result; + std::vector all_tids = pystack::getThreadIds(manager->get_manager()); + + if (head != 0) { + bool add_native = native_mode == NativeReportingMode::PYTHON + || native_mode == NativeReportingMode::ALL; + auto threads = pystack::buildThreadsFromInterpreter( + manager->get_manager(), + head, + manager->pid(), + add_native, + locals); + + for (const auto& thread : threads) { + result.append(buildPyThreadObject(thread, types, manager->python_version())); + all_tids.erase( + std::remove(all_tids.begin(), all_tids.end(), thread.tid), + all_tids.end()); + } + } + + if (native_mode == NativeReportingMode::ALL) { + for (int tid : all_tids) { + auto thread = pystack::buildNativeThread(manager->get_manager(), manager->pid(), tid); + result.append(buildNativeOnlyThreadObject(thread, types)); + } + } + + return result; + } catch (const NotEnoughInformationError&) { + throw; + } catch (const EngineError&) { + throw; + } catch (const std::exception& e) { + throw EngineError(e.what()); + } +} + +void +_check_interpreter_shutdown(nb::object manager) +{ + int status = nb::cast(manager.attr("interpreter_status")()); + + if (status == static_cast(pystack::AbstractProcessManager::InterpreterStatus::FINALIZED)) { + pystack::LOG(pystack::WARNING) + << "The interpreter is shutting itself down so it is possible that no " + "Python stack trace is available for inspection."; + } else if (status != -1) { + // -1 means failed to detect, 2 means FINALIZED (already handled above) + // Other values mean running/active + pystack::LOG(pystack::INFO) << "An active interpreter has been detected"; + } +} + +NB_MODULE(_pystack, m) +{ + m.doc() = "PyStack native extension module"; + + nb::register_exception_translator([](const std::exception_ptr& p, void*) { + try { + if (p) std::rethrow_exception(p); + } catch (const NotEnoughInformationError& e) { + nb::object exc_type = nb::module_::import_("pystack.errors").attr("NotEnoughInformation"); + PyErr_SetString(exc_type.ptr(), e.what()); + } catch (const EngineError& e) { + nb::object exc_type = nb::module_::import_("pystack.errors").attr("EngineError"); + PyErr_SetString(exc_type.ptr(), e.what()); + } + }); + + pystack::initializePythonLoggerInterface(); + + nb::enum_(m, "StackMethod", nb::is_flag()) + .value("ELF_DATA", StackMethod::ELF_DATA) + .value("SYMBOLS", StackMethod::SYMBOLS) + .value("BSS", StackMethod::BSS) + .value("ANONYMOUS_MAPS", StackMethod::ANONYMOUS_MAPS) + .value("HEAP", StackMethod::HEAP) + .value("DEBUG_OFFSETS", StackMethod::DEBUG_OFFSETS) + .value("AUTO", StackMethod::AUTO) + .value("ALL", StackMethod::ALL); + + nb::enum_(m, "NativeReportingMode") + .value("OFF", NativeReportingMode::OFF) + .value("PYTHON", NativeReportingMode::PYTHON) + .value("ALL", NativeReportingMode::ALL) + .value("LAST", NativeReportingMode::LAST); + + nb::class_(m, "CoreFileAnalyzer") + .def(nb::init< + const std::filesystem::path&, + std::optional, + std::optional>(), + "core_file"_a, + "executable"_a = nb::none(), + "lib_search_path"_a = nb::none()) + .def("extract_maps", &CoreFileAnalyzerWrapper::extract_maps) + .def("extract_pid", &CoreFileAnalyzerWrapper::extract_pid) + .def("extract_executable", &CoreFileAnalyzerWrapper::extract_executable) + .def("extract_failure_info", &CoreFileAnalyzerWrapper::extract_failure_info) + .def("extract_ps_info", &CoreFileAnalyzerWrapper::extract_ps_info) + .def("missing_modules", &CoreFileAnalyzerWrapper::missing_modules) + .def("extract_module_load_points", &CoreFileAnalyzerWrapper::extract_module_load_points) + .def("extract_build_ids", &CoreFileAnalyzerWrapper::extract_build_ids); + + nb::class_(m, "ProcessManager") + .def_static( + "create_from_pid", + &ProcessManagerWrapper::create_from_pid, + "pid"_a, + "stop_process"_a = true) + .def_static( + "create_from_core", + &ProcessManagerWrapper::create_from_core, + "core_file"_a, + "executable"_a, + "lib_search_path"_a = nb::none()) + .def("interpreter_status", &ProcessManagerWrapper::interpreter_status) + .def("is_interpreter_active", &ProcessManagerWrapper::is_interpreter_active) + .def_prop_ro("pid", &ProcessManagerWrapper::pid) + .def_prop_ro("python_version", &ProcessManagerWrapper::python_version) + .def( + "__enter__", + [](ProcessManagerWrapper& self) -> ProcessManagerWrapper& { return self; }, + nb::rv_policy::reference) + .def("__exit__", [](ProcessManagerWrapper& self, nb::args) { self.reset(); }); + + m.def("copy_memory_from_address", + ©_memory_from_address, + "pid"_a, + "address"_a, + "size"_a, + "Copy memory from a remote process"); + + m.def("get_bss_info", &get_bss_info, "binary"_a, "Get BSS section information from an ELF binary"); + + // Note: We use nb::arg().none() to allow None to be passed explicitly + m.def( + "get_process_threads", + [](pid_t pid, + bool stop_process, + NativeReportingMode native_mode, + bool locals, + nb::object method_obj) { + if (method_obj.is_none()) { + throw std::invalid_argument("Invalid method for stack analysis"); + } + StackMethod method = nb::cast(method_obj); + return get_process_threads(pid, stop_process, native_mode, locals, method); + }, + "pid"_a, + "stop_process"_a = true, + "native_mode"_a = NativeReportingMode::OFF, + "locals"_a = false, + nb::arg("method").none() = nb::cast(StackMethod::AUTO), + "Return an iterable of Thread objects from a live process"); + + m.def( + "get_process_threads_for_core", + [](const std::filesystem::path& core_file, + const std::filesystem::path& executable, + std::optional library_search_path, + NativeReportingMode native_mode, + bool locals, + nb::object method_obj) { + if (method_obj.is_none()) { + throw std::invalid_argument("Invalid method for stack analysis"); + } + StackMethod method = nb::cast(method_obj); + return get_process_threads_for_core( + core_file, + executable, + library_search_path, + native_mode, + locals, + method); + }, + "core_file"_a, + "executable"_a, + "library_search_path"_a = nb::none(), + "native_mode"_a = NativeReportingMode::PYTHON, + "locals"_a = false, + nb::arg("method").none() = nb::cast(StackMethod::AUTO), + "Return an iterable of Thread objects from a core file"); + + m.def("_check_interpreter_shutdown", + &_check_interpreter_shutdown, + "manager"_a, + "Check interpreter shutdown status and log appropriately"); + + // intercept_runtime_errors decorator - re-export from pystack.errors + nb::module_ pystack_errors = nb::module_::import_("pystack.errors"); + m.attr("intercept_runtime_errors") = pystack_errors.attr("intercept_runtime_errors"); +} diff --git a/src/pystack/_pystack/corefile.pxd b/src/pystack/_pystack/corefile.pxd deleted file mode 100644 index 629f77af..00000000 --- a/src/pystack/_pystack/corefile.pxd +++ /dev/null @@ -1,55 +0,0 @@ -from posix.types cimport pid_t - -from _pystack.elf_common cimport CoreFileAnalyzer -from _pystack.mem cimport SimpleVirtualMap -from libc.stdint cimport uintptr_t -from libcpp.memory cimport shared_ptr -from libcpp.string cimport string as cppstring -from libcpp.vector cimport vector - - -cdef extern from "corefile.h" namespace "pystack": - struct CoreCrashInfo: - int si_signo - int si_errno - int si_code - int sender_pid - int sender_uid - uintptr_t failed_addr - - struct CorePsInfo: - char state - char sname - char zomb - char nice - unsigned long flag - int uid - int gid - pid_t pid - pid_t ppid - pid_t pgrp - pid_t sid - char fname[16] - char psargs[80] - - struct CoreVirtualMap: - uintptr_t start - uintptr_t end - unsigned long filesize - cppstring flags - unsigned long offset - cppstring device - unsigned long inode - cppstring path - cppstring buildid - - cdef cppclass CoreFileExtractor: - CoreFileExtractor(shared_ptr[CoreFileAnalyzer] analyzer) except+ - int Pid() except+ - vector[CoreVirtualMap] MemoryMaps() except+ - vector[SimpleVirtualMap] ModuleInformation() except+ - cppstring extractExecutable() except+ - CoreCrashInfo extractFailureInfo() except+ - CorePsInfo extractPSInfo() except+ - vector[cppstring] missingModules() except+ - vector[CoreVirtualMap] extractMappedFiles() except+ diff --git a/src/pystack/_pystack/elf_common.pxd b/src/pystack/_pystack/elf_common.pxd deleted file mode 100644 index 233ee822..00000000 --- a/src/pystack/_pystack/elf_common.pxd +++ /dev/null @@ -1,22 +0,0 @@ -from libc.stdint cimport uintptr_t -from libcpp.string cimport string as cppstring - - -cdef extern from "elf_common.h" namespace "pystack": - cdef cppclass ProcessAnalyzer: - ProcessAnalyzer(int pid) except+ - - cdef cppclass CoreFileAnalyzer: - CoreFileAnalyzer(cppstring filename) except+ - CoreFileAnalyzer(cppstring filename, cppstring executable) except+ - CoreFileAnalyzer(cppstring filename, cppstring executable, cppstring lib_search_path) except+ - - struct SectionInfo: - cppstring name - cppstring flags - uintptr_t addr - uintptr_t corrected_addr - size_t offset - size_t size - - int getSectionInfo(const cppstring& filename, const cppstring& section_name, SectionInfo* result) except+ diff --git a/src/pystack/_pystack/logging.cpp b/src/pystack/_pystack/logging.cpp index 4ac13a73..b0a3cb74 100644 --- a/src/pystack/_pystack/logging.cpp +++ b/src/pystack/_pystack/logging.cpp @@ -1,29 +1,101 @@ #include #include -#include "../_pystack_api.h" +#define PY_SSIZE_T_CLEAN +#include + #include "logging.h" namespace pystack { +static PyObject* g_logger = nullptr; static int LOGGER_INITIALIZED = false; void initializePythonLoggerInterface() { - import_pystack___pystack(); + if (LOGGER_INITIALIZED) { + return; + } + + // Import the logging module and get a logger + PyObject* logging_module = PyImport_ImportModule("logging"); + if (!logging_module) { + PyErr_Print(); + throw std::runtime_error("Failed to import logging module"); + } + + PyObject* getLogger = PyObject_GetAttrString(logging_module, "getLogger"); + if (!getLogger) { + Py_DECREF(logging_module); + PyErr_Print(); + throw std::runtime_error("Failed to get logging.getLogger"); + } + + // Get logger for pystack._pystack + PyObject* logger_name = PyUnicode_FromString("pystack._pystack"); + g_logger = PyObject_CallFunctionObjArgs(getLogger, logger_name, NULL); + Py_DECREF(logger_name); + Py_DECREF(getLogger); + Py_DECREF(logging_module); + + if (!g_logger) { + PyErr_Print(); + throw std::runtime_error("Failed to create logger"); + } + LOGGER_INITIALIZED = true; } void logWithPython(const std::string& message, int level) { - if (!LOGGER_INITIALIZED) { - throw std::runtime_error("Logger is not initialized"); + if (!LOGGER_INITIALIZED || !g_logger) { + return; + } + + if (PyErr_Occurred()) { + return; + } + + // Get the log method name based on level + const char* method_name; + switch (level) { + case DEBUG: + method_name = "debug"; + break; + case INFO: + method_name = "info"; + break; + case WARNING: + method_name = "warning"; + break; + case ERROR: + method_name = "error"; + break; + case CRITICAL: + method_name = "critical"; + break; + default: + method_name = "info"; + break; } - if (!PyErr_Occurred()) { - log_with_python(&message, level); + + // Call the log method + PyObject* py_message = PyUnicode_FromString(message.c_str()); + if (!py_message) { + PyErr_Clear(); + return; + } + + PyObject* result = PyObject_CallMethod(g_logger, method_name, "O", py_message); + Py_DECREF(py_message); + + if (!result) { + PyErr_Clear(); + return; } + Py_DECREF(result); } } // namespace pystack diff --git a/src/pystack/_pystack/logging.pxd b/src/pystack/_pystack/logging.pxd deleted file mode 100644 index 5be77567..00000000 --- a/src/pystack/_pystack/logging.pxd +++ /dev/null @@ -1,2 +0,0 @@ -cdef extern from "logging.h" namespace "pystack": - void initializePythonLoggerInterface() diff --git a/src/pystack/_pystack/maps_parser.cpp b/src/pystack/_pystack/maps_parser.cpp new file mode 100644 index 00000000..feeb1fd8 --- /dev/null +++ b/src/pystack/_pystack/maps_parser.cpp @@ -0,0 +1,357 @@ +#include "maps_parser.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "logging.h" + +namespace pystack { + +namespace fs = std::filesystem; + +// Regex pattern for parsing /proc/pid/maps lines +// Format: start-end permissions offset dev inode pathname +static const std::regex MAPS_REGEXP( + R"(([0-9a-f]+)-([0-9a-f]+)\s+(.{4})\s+([0-9a-f]+)\s+([0-9a-f]+:[0-9a-f]+)\s+(\d+)\s*(.*)?)"); + +std::vector +parseProcMaps(pid_t pid) +{ + std::vector maps; + std::string maps_path = "/proc/" + std::to_string(pid) + "/maps"; + + std::ifstream maps_file(maps_path); + if (!maps_file.is_open()) { + throw std::runtime_error("No such process id: " + std::to_string(pid)); + } + + std::string line; + while (std::getline(maps_file, line)) { + std::smatch match; + if (!std::regex_match(line, match, MAPS_REGEXP)) { + LOG(DEBUG) << "Line cannot be recognized: " << line; + continue; + } + + uintptr_t start = std::stoull(match[1].str(), nullptr, 16); + uintptr_t end = std::stoull(match[2].str(), nullptr, 16); + std::string permissions = match[3].str(); + unsigned long offset = std::stoul(match[4].str(), nullptr, 16); + std::string device = match[5].str(); + unsigned long inode = std::stoul(match[6].str()); + std::string pathname = match[7].str(); + + size_t start_pos = pathname.find_first_not_of(" \t"); + if (start_pos != std::string::npos) { + pathname = pathname.substr(start_pos); + } else { + pathname = ""; + } + + maps.emplace_back( + start, + end, + end - start, // filesize + permissions, + offset, + device, + inode, + pathname); + } + + return maps; +} + +std::vector +parseCoreFileMaps( + const std::vector& mapped_files, + const std::vector& memory_maps) +{ + std::set> memory_map_ranges; + for (const auto& map : memory_maps) { + memory_map_ranges.insert({map.start, map.end}); + } + + std::vector missing_mapped_files; + for (const auto& map : mapped_files) { + if (memory_map_ranges.find({map.start, map.end}) == memory_map_ranges.end()) { + missing_mapped_files.push_back(map); + } + } + + std::vector all_maps; + all_maps.reserve(memory_maps.size() + missing_mapped_files.size()); + all_maps.insert(all_maps.end(), memory_maps.begin(), memory_maps.end()); + all_maps.insert(all_maps.end(), missing_mapped_files.begin(), missing_mapped_files.end()); + + std::sort(all_maps.begin(), all_maps.end(), [](const CoreVirtualMap& a, const CoreVirtualMap& b) { + return a.start < b.start; + }); + + std::set missing_map_paths; + for (const auto& map : missing_mapped_files) { + if (!map.path.empty()) { + try { + missing_map_paths.insert(fs::canonical(map.path).string()); + } catch (...) { + missing_map_paths.insert(map.path); + } + } + } + + std::unordered_map file_maps; + for (const auto& map : memory_maps) { + if (map.path.empty()) { + continue; + } + try { + std::string resolved_path = fs::canonical(map.path).string(); + if (missing_map_paths.count(resolved_path)) { + file_maps[resolved_path] = map.path; + } + } catch (...) { + // Ignore errors resolving paths + } + } + + std::vector result; + result.reserve(all_maps.size()); + for (const auto& elem : all_maps) { + std::string path = elem.path; + if (!path.empty()) { + auto it = file_maps.find(path); + if (it != file_maps.end()) { + path = it->second; + } + } + result.emplace_back( + elem.start, + elem.end, + elem.filesize, + elem.flags, + elem.offset, + elem.device, + elem.inode, + path); + } + + return result; +} + +static VirtualMap +getBaseMap(const std::vector& binary_maps) +{ + for (const auto& map : binary_maps) { + if (!map.Path().empty()) { + return map; + } + } + if (!binary_maps.empty()) { + return binary_maps[0]; + } + throw std::runtime_error("No maps available"); +} + +static std::optional +getBss(const std::vector& elf_maps, uintptr_t load_point) +{ + if (elf_maps.empty()) { + return std::nullopt; + } + + VirtualMap binary_map = getBaseMap(elf_maps); + if (binary_map.Path().empty()) { + return std::nullopt; + } + + SectionInfo bss_info; + if (!getSectionInfo(binary_map.Path(), ".bss", &bss_info)) { + return std::nullopt; + } + + uintptr_t start = load_point + bss_info.corrected_addr; + LOG(INFO) << "Determined exact addr of .bss section: " << std::hex << start << " (" << load_point + << " + " << bss_info.corrected_addr << ")" << std::dec; + + unsigned long offset = 0; + + const VirtualMap* first_matching_map = nullptr; + for (const auto& map : elf_maps) { + if (map.containsAddr(start)) { + first_matching_map = ↦ + break; + } + } + + if (!first_matching_map) { + return std::nullopt; + } + + offset = first_matching_map->Offset() + (start - first_matching_map->Start()); + + return VirtualMap( + start, + start + bss_info.size, + bss_info.size, + "", // flags + offset, // offset + "", // device + 0, // inode + ""); // path +} + +ProcessMemoryMapInfo +parseMapInformation( + const std::string& binary, + const std::vector& maps, + const std::unordered_map* load_point_by_module) +{ + std::unordered_map> maps_by_library; + std::string current_lib; + + std::unordered_map computed_load_points; + if (!load_point_by_module) { + for (const auto& map : maps) { + if (!map.Path().empty()) { + std::string name = fs::path(map.Path()).filename().string(); + if (computed_load_points.find(name) == computed_load_points.end()) { + computed_load_points[name] = map.Start(); + } else { + computed_load_points[name] = std::min(computed_load_points[name], map.Start()); + } + } + } + load_point_by_module = &computed_load_points; + } + + for (const auto& memory_range : maps) { + std::string path_name; + if (!memory_range.Path().empty()) { + path_name = fs::path(memory_range.Path()).filename().string(); + current_lib = path_name; + } else { + path_name = current_lib; + } + maps_by_library[path_name].push_back(memory_range); + } + + std::string binary_name = fs::path(binary).filename().string(); + + auto python_it = maps_by_library.find(binary_name); + if (python_it == maps_by_library.end()) { + // Construct error message with available maps + std::ostringstream available; + for (const auto& map : maps) { + if (!map.Path().empty() && map.Path().find(".so") == std::string::npos) { + available << map.Path() << ", "; + } + } + std::string available_str = available.str(); + if (available_str.length() >= 2) { + available_str = available_str.substr(0, available_str.length() - 2); + } + throw std::runtime_error( + "Unable to find maps for the executable " + binary + + ". Available executable maps: " + available_str); + } + + const std::vector& binary_maps = python_it->second; + VirtualMap python = getBaseMap(binary_maps); + LOG(INFO) << "python binary first map found: " << python.Path(); + + std::optional libpython; + const std::vector* elf_maps = nullptr; + std::string libpython_name; + + std::vector libpython_binaries; + for (const auto& [lib_name, _] : maps_by_library) { + if (lib_name.find("libpython") != std::string::npos) { + libpython_binaries.push_back(lib_name); + } + } + + uintptr_t load_point = 0; + if (libpython_binaries.size() > 1) { + throw std::runtime_error( + "Unexpectedly found multiple libpython in process: " + + std::to_string(libpython_binaries.size())); + } else if (libpython_binaries.size() == 1) { + libpython_name = libpython_binaries[0]; + const auto& libpython_maps = maps_by_library[libpython_name]; + elf_maps = &libpython_maps; + auto load_it = load_point_by_module->find(libpython_name); + load_point = (load_it != load_point_by_module->end()) ? load_it->second : UINTPTR_MAX; + libpython = getBaseMap(libpython_maps); + LOG(INFO) << libpython_name << " first map found: " << libpython->Path(); + } else { + LOG(INFO) << "Process does not have a libpython.so, reading from binary"; + elf_maps = &binary_maps; + auto load_it = load_point_by_module->find(binary_name); + load_point = (load_it != load_point_by_module->end()) ? load_it->second : UINTPTR_MAX; + } + + std::optional heap; + auto heap_it = maps_by_library.find("[heap]"); + if (heap_it != maps_by_library.end() && !heap_it->second.empty()) { + heap = heap_it->second.front(); + LOG(INFO) << "Heap map found"; + } + + std::optional bss = getBss(*elf_maps, load_point); + if (!bss) { + for (const auto& map : *elf_maps) { + if (map.Path().empty() && map.Flags().find('r') != std::string::npos) { + bss = map; + break; + } + } + } + if (bss) { + LOG(INFO) << "bss map found"; + } + + return ProcessMemoryMapInfo{heap, bss, python, libpython}; +} + +ProcessMemoryMapInfo +parseMapInformationForProcess(pid_t pid, const std::vector& maps) +{ + std::string exe_link = "/proc/" + std::to_string(pid) + "/exe"; + char exe_path[PATH_MAX]; + ssize_t len = readlink(exe_link.c_str(), exe_path, sizeof(exe_path) - 1); + if (len == -1) { + throw std::runtime_error("Failed to read /proc/" + std::to_string(pid) + "/exe"); + } + exe_path[len] = '\0'; + return parseMapInformation(exe_path, maps); +} + +std::optional +getThreadName(pid_t pid, pid_t tid) +{ + std::string comm_path = "/proc/" + std::to_string(pid) + "/task/" + std::to_string(tid) + "/comm"; + std::ifstream comm_file(comm_path); + if (!comm_file.is_open()) { + return std::nullopt; + } + + std::string name; + std::getline(comm_file, name); + + size_t end = name.find_last_not_of(" \t\n\r"); + if (end != std::string::npos) { + name = name.substr(0, end + 1); + } + + return name; +} + +} // namespace pystack diff --git a/src/pystack/_pystack/maps_parser.h b/src/pystack/_pystack/maps_parser.h new file mode 100644 index 00000000..9678e9ce --- /dev/null +++ b/src/pystack/_pystack/maps_parser.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "corefile.h" +#include "elf_common.h" +#include "mem.h" + +namespace pystack { + +struct ProcessMemoryMapInfo +{ + std::optional heap; + std::optional bss; + VirtualMap python; + std::optional libpython; +}; + +std::vector +parseProcMaps(pid_t pid); + +std::vector +parseCoreFileMaps( + const std::vector& mapped_files, + const std::vector& memory_maps); + +ProcessMemoryMapInfo +parseMapInformation( + const std::string& binary, + const std::vector& maps, + const std::unordered_map* load_point_by_module = nullptr); + +ProcessMemoryMapInfo +parseMapInformationForProcess(pid_t pid, const std::vector& maps); + +std::optional +getThreadName(pid_t pid, pid_t tid); + +} // namespace pystack diff --git a/src/pystack/_pystack/mem.cpp b/src/pystack/_pystack/mem.cpp index 825f3c62..44c6aac6 100644 --- a/src/pystack/_pystack/mem.cpp +++ b/src/pystack/_pystack/mem.cpp @@ -113,49 +113,6 @@ VirtualMap::Size() const return d_end - d_start; } -MemoryMapInformation::MemoryMapInformation() -: d_main_map(std::nullopt) -, d_bss(std::nullopt) -, d_heap(std::nullopt) -{ -} - -const std::optional& -MemoryMapInformation::MainMap() -{ - return d_main_map; -} - -const std::optional& -MemoryMapInformation::Bss() -{ - return d_bss; -} - -const std::optional& -MemoryMapInformation::Heap() -{ - return d_heap; -} - -void -MemoryMapInformation::setMainMap(const VirtualMap& main_map) -{ - d_main_map = main_map; -} - -void -MemoryMapInformation::setBss(const VirtualMap& bss) -{ - d_bss = bss; -} - -void -MemoryMapInformation::setHeap(const VirtualMap& heap) -{ - d_heap = heap; -} - LRUCache::LRUCache(size_t capacity) : d_cache_capacity(capacity) , d_size(0){}; diff --git a/src/pystack/_pystack/mem.h b/src/pystack/_pystack/mem.h index b78b6674..64409ebc 100644 --- a/src/pystack/_pystack/mem.h +++ b/src/pystack/_pystack/mem.h @@ -81,6 +81,27 @@ class VirtualMap // Methods bool containsAddr(remote_addr_t addr) const; + // Permission helpers + bool isExecutable() const + { + return d_flags.find('x') != std::string::npos; + } + + bool isReadable() const + { + return d_flags.find('r') != std::string::npos; + } + + bool isWritable() const + { + return d_flags.find('w') != std::string::npos; + } + + bool isPrivate() const + { + return d_flags.find('p') != std::string::npos; + } + private: // Data members uintptr_t d_start{}; @@ -93,29 +114,6 @@ class VirtualMap std::string d_path{}; }; -class MemoryMapInformation -{ - public: - MemoryMapInformation(); - - // Getters - const std::optional& MainMap(); - const std::optional& Bss(); - const std::optional& Heap(); - - // Setters - - void setMainMap(const VirtualMap& main_map); - void setBss(const VirtualMap& bss); - void setHeap(const VirtualMap& heap); - - private: - // Data members - std::optional d_main_map; - std::optional d_bss; - std::optional d_heap; -}; - class LRUCache { private: diff --git a/src/pystack/_pystack/mem.pxd b/src/pystack/_pystack/mem.pxd deleted file mode 100644 index 02cfcce3..00000000 --- a/src/pystack/_pystack/mem.pxd +++ /dev/null @@ -1,33 +0,0 @@ -from libc.stdint cimport uintptr_t -from libcpp.string cimport string as cppstring -from libcpp.vector cimport vector - - -cdef extern from "mem.h" namespace "pystack": - ctypedef uintptr_t remote_addr_t - - cdef cppclass AbstractRemoteMemoryManager: - ssize_t copyMemoryFromProcess(remote_addr_t addr, size_t size, void *destination) except+ - - cdef cppclass ProcessMemoryManager(AbstractRemoteMemoryManager): - ProcessMemoryManager(int pid) except+ - ssize_t copyMemoryFromProcess(remote_addr_t addr, size_t size, void *destination) except+ - - - struct SimpleVirtualMap: - uintptr_t start - uintptr_t end - cppstring filename - cppstring buildid - - cdef cppclass VirtualMap: - VirtualMap() - VirtualMap(uintptr_t start, uintptr_t end, unsigned long filesize, - cppstring flags, unsigned long offset, cppstring permissions, - unsigned long inode, cppstring pathname) - - cdef cppclass MemoryMapInformation: - MemoryMapInformation() - void setMainMap(const VirtualMap& bss) - void setBss(const VirtualMap& bss) - void setHeap(const VirtualMap& heap) diff --git a/src/pystack/_pystack/native_frame.pxd b/src/pystack/_pystack/native_frame.pxd deleted file mode 100644 index 4aa13188..00000000 --- a/src/pystack/_pystack/native_frame.pxd +++ /dev/null @@ -1,11 +0,0 @@ -from libcpp.string cimport string as cppstring - - -cdef extern from "native_frame.h" namespace "pystack": - struct NativeFrame: - unsigned long address - cppstring symbol - cppstring path - int linenumber - int colnumber - cppstring library diff --git a/src/pystack/_pystack/process.cpp b/src/pystack/_pystack/process.cpp index 52ec0099..fcf62e45 100644 --- a/src/pystack/_pystack/process.cpp +++ b/src/pystack/_pystack/process.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -13,6 +14,7 @@ #include "corefile.h" #include "logging.h" +#include "maps_parser.h" #include "mem.h" #include "native_frame.h" #include "process.h" @@ -22,6 +24,7 @@ #include "pythread.h" #include "pytypes.h" #include "version.h" +#include "version_detector.h" namespace { @@ -63,6 +66,19 @@ class DirectoryReader } // namespace namespace pystack { +namespace fs = std::filesystem; + +namespace { + +// Helper to extract the main interpreter map from ProcessMemoryMapInfo +std::optional +getMainMap(const ProcessMemoryMapInfo& map_info) +{ + return map_info.libpython ? *map_info.libpython : map_info.python; +} + +} // namespace + namespace { // unnamed struct ParsedPyVersion @@ -213,21 +229,35 @@ ProcessTracer::getTids() const AbstractProcessManager::AbstractProcessManager( pid_t pid, std::vector&& memory_maps, - MemoryMapInformation&& map_info) + std::optional main_map, + std::optional bss, + std::optional heap) : d_pid(pid) -, d_memory_maps(memory_maps) +, d_main_map(std::move(main_map)) +, d_bss(std::move(bss)) +, d_heap(std::move(heap)) +, d_memory_maps(std::move(memory_maps)) , d_manager(nullptr) , d_unwinder(nullptr) , d_analyzer(nullptr) { - d_main_map = map_info.MainMap(); - d_bss = map_info.Bss(); - d_heap = map_info.Heap(); if (!d_main_map) { throw std::runtime_error("The main interpreter map could not be located"); } } +const std::vector& +AbstractProcessManager::MemoryMaps() const +{ + return d_memory_maps; +} + +std::pair +AbstractProcessManager::Version() const +{ + return std::make_pair(d_major, d_minor); +} + bool AbstractProcessManager::isValidDictionaryObject(remote_addr_t addr) const { @@ -1361,17 +1391,49 @@ AbstractProcessManager::findInterpreterStateFromDebugOffsets() const return 0; } +std::shared_ptr +ProcessManager::create(pid_t pid, bool stop_process) +{ + std::shared_ptr tracer; + if (stop_process) { + tracer = std::make_shared(pid); + } + + auto virtual_maps = parseProcMaps(pid); + auto map_info = parseMapInformationForProcess(pid, virtual_maps); + auto analyzer = std::make_shared(pid); + + auto manager = std::make_shared( + pid, + tracer, + analyzer, + std::move(virtual_maps), + getMainMap(map_info), + map_info.bss, + map_info.heap); + + manager->initializeVersion(pid, map_info); + return manager; +} + ProcessManager::ProcessManager( pid_t pid, const std::shared_ptr& tracer, const std::shared_ptr& analyzer, std::vector memory_maps, - MemoryMapInformation map_info) -: AbstractProcessManager(pid, std::move(memory_maps), std::move(map_info)) -, tracer(tracer) + std::optional main_map, + std::optional bss, + std::optional heap) +: AbstractProcessManager( + pid, + std::move(memory_maps), + std::move(main_map), + std::move(bss), + std::move(heap)) +, d_tracer(tracer) { - if (tracer) { - d_tids = tracer->getTids(); + if (d_tracer) { + d_tids = d_tracer->getTids(); } else { d_tids = getProcessTids(pid); } @@ -1380,25 +1442,103 @@ ProcessManager::ProcessManager( d_unwinder = std::make_unique(analyzer); } +void +ProcessManager::initializeVersion(pid_t pid, const ProcessMemoryMapInfo& map_info) +{ + // Try to get version from debug offsets first + setPythonVersionFromDebugOffsets(); + auto python_version = findPythonVersion(); + + // Fallback to external version detection if needed + if (python_version.first == -1 && python_version.second == -1) { + python_version = getVersionForProcess(pid, map_info, d_manager.get()); + } + + setPythonVersion(python_version); +} + const std::vector& ProcessManager::Tids() const { return d_tids; } +std::shared_ptr +CoreFileProcessManager::create( + const std::string& core_file, + const std::string& executable, + const std::optional& lib_search_path) +{ + std::shared_ptr analyzer; + if (lib_search_path) { + analyzer = std::make_shared(core_file, executable, *lib_search_path); + } else { + analyzer = std::make_shared(core_file, executable); + } + + auto extractor = std::make_unique(analyzer); + + auto mapped_files = extractor->extractMappedFiles(); + auto memory_maps = extractor->MemoryMaps(); + + std::unordered_map load_point_by_module; + for (const auto& mod : extractor->ModuleInformation()) { + auto name = fs::path(mod.filename).filename().string(); + load_point_by_module[name] = mod.start; + } + + auto virtual_maps = parseCoreFileMaps(mapped_files, memory_maps); + pid_t pid = extractor->Pid(); + auto map_info = parseMapInformation(executable, virtual_maps, &load_point_by_module); + + auto manager = std::make_shared( + pid, + analyzer, + std::move(virtual_maps), + getMainMap(map_info), + map_info.bss, + map_info.heap); + + manager->initializeVersion(core_file, map_info); + return manager; +} + CoreFileProcessManager::CoreFileProcessManager( pid_t pid, const std::shared_ptr& analyzer, std::vector memory_maps, - MemoryMapInformation map_info) -: AbstractProcessManager(pid, std::move(memory_maps), std::move(map_info)) + std::optional main_map, + std::optional bss, + std::optional heap) +: AbstractProcessManager( + pid, + std::move(memory_maps), + std::move(main_map), + std::move(bss), + std::move(heap)) { d_analyzer = analyzer; d_manager = std::make_unique(analyzer, d_memory_maps); - d_executable = analyzer->d_executable; - std::unique_ptr the_unwinder = std::make_unique(analyzer); - d_tids = the_unwinder->getCoreTids(); - d_unwinder = std::move(the_unwinder); + auto unwinder = std::make_unique(analyzer); + d_tids = unwinder->getCoreTids(); + d_unwinder = std::move(unwinder); +} + +void +CoreFileProcessManager::initializeVersion( + const std::string& core_file, + const ProcessMemoryMapInfo& map_info) +{ + // Try to get version from debug offsets first + setPythonVersionFromDebugOffsets(); + auto python_version = findPythonVersion(); + + // Fallback to external version detection if needed + if (python_version.first == -1 && python_version.second == -1) { + python_version = getVersionForCore(core_file, map_info); + } + + setPythonVersion(python_version); } const std::vector& diff --git a/src/pystack/_pystack/process.h b/src/pystack/_pystack/process.h index e8554b02..0f52ff3c 100644 --- a/src/pystack/_pystack/process.h +++ b/src/pystack/_pystack/process.h @@ -1,7 +1,5 @@ #pragma once -#include - #include #include #include @@ -12,6 +10,7 @@ #include #include "elf_common.h" +#include "maps_parser.h" #include "mem.h" #include "native_frame.h" #include "pycompat.h" @@ -63,11 +62,15 @@ class AbstractProcessManager : public std::enable_shared_from_this&& memory_maps, - MemoryMapInformation&& map_info); + std::optional main_map, + std::optional bss, + std::optional heap); // Getters pid_t Pid() const; virtual const std::vector& Tids() const = 0; + const std::vector& MemoryMaps() const; + std::pair Version() const; remote_addr_t getAddressFromCache(const std::string& symbol) const; void registerAddressInCache(const std::string& symbol, remote_addr_t address) const; @@ -144,13 +147,18 @@ AbstractProcessManager::copyObjectFromProcess(remote_addr_t addr, T* destination class ProcessManager : public AbstractProcessManager { public: + // Factory method + static std::shared_ptr create(pid_t pid, bool stop_process = true); + // Constructors ProcessManager( pid_t pid, const std::shared_ptr& tracer, const std::shared_ptr& analyzer, std::vector memory_maps, - MemoryMapInformation map_info); + std::optional main_map, + std::optional bss, + std::optional heap); // Destructors virtual ~ProcessManager() = default; @@ -160,19 +168,30 @@ class ProcessManager : public AbstractProcessManager private: // Data members - std::shared_ptr tracer; + std::shared_ptr d_tracer; std::vector d_tids; + + // Methods + void initializeVersion(pid_t pid, const ProcessMemoryMapInfo& map_info); }; class CoreFileProcessManager : public AbstractProcessManager { public: + // Factory method + static std::shared_ptr + create(const std::string& core_file, + const std::string& executable, + const std::optional& lib_search_path = std::nullopt); + // Constructors CoreFileProcessManager( pid_t pid, const std::shared_ptr& analyzer, std::vector memory_maps, - MemoryMapInformation map_info); + std::optional main_map, + std::optional bss, + std::optional heap); // Destructors virtual ~CoreFileProcessManager() = default; @@ -183,6 +202,8 @@ class CoreFileProcessManager : public AbstractProcessManager private: // Data members std::vector d_tids; - std::optional d_executable; + + // Methods + void initializeVersion(const std::string& core_file, const ProcessMemoryMapInfo& map_info); }; } // namespace pystack diff --git a/src/pystack/_pystack/process.pxd b/src/pystack/_pystack/process.pxd deleted file mode 100644 index ede12d58..00000000 --- a/src/pystack/_pystack/process.pxd +++ /dev/null @@ -1,41 +0,0 @@ -from _pystack.elf_common cimport CoreFileAnalyzer -from _pystack.elf_common cimport ProcessAnalyzer -from _pystack.mem cimport MemoryMapInformation -from _pystack.mem cimport VirtualMap -from _pystack.mem cimport remote_addr_t -from libc.stdint cimport uintptr_t -from libcpp.memory cimport shared_ptr -from libcpp.string cimport string as cppstring -from libcpp.utility cimport pair -from libcpp.vector cimport vector - - -cdef extern from "process.h" namespace "pystack::AbstractProcessManager": - cdef enum InterpreterStatus: - RUNNING - FINALIZED - UNKNOWN - -cdef extern from "process.h" namespace "pystack": - cdef cppclass ProcessTracer: - pass - - cdef cppclass AbstractProcessManager: - remote_addr_t scanBSS() except+ - remote_addr_t scanHeap() except+ - remote_addr_t scanAllAnonymousMaps() except+ - remote_addr_t findInterpreterStateFromDebugOffsets() except+ - remote_addr_t findInterpreterStateFromSymbols() except+ - remote_addr_t findInterpreterStateFromElfData() except+ - ssize_t copyMemoryFromProcess(remote_addr_t addr, ssize_t size, void *destination) except+ - vector[int] Tids() except+ - InterpreterStatus isInterpreterActive() except+ - pair[int, int] findPythonVersion() - void setPythonVersion(pair[int, int] version) except + - void setPythonVersionFromDebugOffsets() except + - - cdef cppclass ProcessManager(AbstractProcessManager): - ProcessManager(int pid, shared_ptr[ProcessTracer] tracer, shared_ptr[ProcessAnalyzer] analyzer, vector[VirtualMap] memory_maps, MemoryMapInformation map_info) except+ - - cdef cppclass CoreFileProcessManager(AbstractProcessManager): - CoreFileProcessManager(int pid, shared_ptr[CoreFileAnalyzer] analyzer, vector[VirtualMap] memory_maps, MemoryMapInformation map_info) except+ diff --git a/src/pystack/_pystack/pycode.pxd b/src/pystack/_pystack/pycode.pxd deleted file mode 100644 index 2596f87f..00000000 --- a/src/pystack/_pystack/pycode.pxd +++ /dev/null @@ -1,17 +0,0 @@ -from libcpp.string cimport string as cppstring -from libcpp.vector cimport vector - - -cdef extern from "pycode.h" namespace "pystack": - cdef struct LocationInfo: - int lineno - int end_lineno - int column - int end_column - - cdef cppclass CodeObject: - cppstring Filename() - cppstring Scope() - LocationInfo Location() - int NArguments() - const vector[cppstring] Varnames() diff --git a/src/pystack/_pystack/pyframe.cpp b/src/pystack/_pystack/pyframe.cpp index 62769529..70037b1c 100644 --- a/src/pystack/_pystack/pyframe.cpp +++ b/src/pystack/_pystack/pyframe.cpp @@ -19,6 +19,7 @@ FrameObject::FrameObject( { LOG(DEBUG) << "Copying frame number " << frame_no; LOG(DEBUG) << std::hex << std::showbase << "Copying frame struct from address " << addr; + Structure frame(manager, addr); d_addr = addr; @@ -36,7 +37,15 @@ FrameObject::FrameObject( auto prev_addr = frame.getField(&py_frame_v::o_back); LOG(DEBUG) << std::hex << std::showbase << "Previous frame address: " << prev_addr; if (prev_addr) { - d_prev = std::make_shared(manager, prev_addr, next_frame_no); + try { + d_prev = std::make_shared(manager, prev_addr, next_frame_no); + } catch (const RemoteMemCopyError& ex) { + // The previous frame address points to unreadable memory (e.g., guard page, + // unmapped region). Treat this as the end of the frame chain. + LOG(DEBUG) << "Failed to read previous frame at " << std::hex << std::showbase << prev_addr + << ", treating as end of frame chain: " << ex.what(); + d_prev = nullptr; + } } d_is_entry = isEntry(manager, frame); } diff --git a/src/pystack/_pystack/pyframe.pxd b/src/pystack/_pystack/pyframe.pxd deleted file mode 100644 index 7cad98e7..00000000 --- a/src/pystack/_pystack/pyframe.pxd +++ /dev/null @@ -1,19 +0,0 @@ -from _pystack.pycode cimport CodeObject -from libcpp cimport bool -from libcpp.memory cimport unique_ptr -from libcpp.string cimport string as cppstring -from libcpp.unordered_map cimport unordered_map - - -cdef extern from "pyframe.h" namespace "pystack": - - cdef cppclass FrameObject: - ssize_t FrameNo() - unique_ptr[FrameObject] PreviousFrame() - unique_ptr[CodeObject] Code() - unordered_map[cppstring, cppstring] Arguments() - unordered_map[cppstring, cppstring] Locals() - bool IsEntryFrame() - bool IsShim() - - void resolveLocalVariables() except+ diff --git a/src/pystack/_pystack/pythread.cpp b/src/pystack/_pystack/pythread.cpp index d50e4126..3607a791 100644 --- a/src/pystack/_pystack/pythread.cpp +++ b/src/pystack/_pystack/pythread.cpp @@ -71,7 +71,11 @@ findPthreadTidOffset( offsetof(_pthread_structure_with_simple_header, tid), offsetof(_pthread_structure_with_tcbhead, tid)}; for (off_t candidate : glibc_pthread_offset_candidates) { - manager->copyObjectFromProcess((remote_addr_t)(pthread_id_addr + candidate), &the_tid); + try { + manager->copyObjectFromProcess((remote_addr_t)(pthread_id_addr + candidate), &the_tid); + } catch (const RemoteMemCopyError& ex) { + continue; + } if (the_tid == manager->Pid()) { LOG(DEBUG) << "Tid offset located using GLIBC offsets at offset " << std::showbase << std::hex << candidate << " in pthread structure"; diff --git a/src/pystack/_pystack/pythread.pxd b/src/pystack/_pystack/pythread.pxd deleted file mode 100644 index 3930a6de..00000000 --- a/src/pystack/_pystack/pythread.pxd +++ /dev/null @@ -1,37 +0,0 @@ -from _pystack.native_frame cimport NativeFrame -from _pystack.process cimport AbstractProcessManager -from _pystack.process cimport remote_addr_t -from _pystack.pyframe cimport FrameObject -from libcpp.memory cimport shared_ptr -from libcpp.string cimport string as cppstring -from libcpp.vector cimport vector - - -cdef extern from "pythread.h" namespace "pystack": - cdef cppclass NativeThread "pystack::Thread": - NativeThread(int, int) except+ - int Tid() - vector[NativeFrame]& NativeFrames() - void populateNativeStackTrace(shared_ptr[AbstractProcessManager] manager) except+ - -cdef extern from "pythread.h" namespace "pystack::PyThread": - cdef enum GilStatus: - UNKNOWN = -1 - NOT_HELD = 0 - HELD = 1 - - cdef enum GCStatus: - COLLECTING_UNKNOWN = -1 - NOT_COLLECTING = 0 - COLLECTING = 1 - -cdef extern from "pythread.h" namespace "pystack": - cdef cppclass Thread "pystack::PyThread": - int Tid() - shared_ptr[FrameObject] FirstFrame() - shared_ptr[Thread] NextThread() - vector[NativeFrame]& NativeFrames() - GilStatus isGilHolder() - GCStatus isGCCollecting() - void populateNativeStackTrace(shared_ptr[AbstractProcessManager] manager) except+ - shared_ptr[Thread] getThreadFromInterpreterState(shared_ptr[AbstractProcessManager] manager, remote_addr_t addr) except+ diff --git a/src/pystack/_pystack/thread_builder.cpp b/src/pystack/_pystack/thread_builder.cpp new file mode 100644 index 00000000..629e502a --- /dev/null +++ b/src/pystack/_pystack/thread_builder.cpp @@ -0,0 +1,193 @@ +#include "thread_builder.h" + +#include "logging.h" +#include "maps_parser.h" + +namespace pystack { + +// StackMethod flags (must match Python enum values) +enum StackMethodFlag { + METHOD_ELF_DATA = 1 << 0, + METHOD_SYMBOLS = 1 << 1, + METHOD_BSS = 1 << 2, + METHOD_ANONYMOUS_MAPS = 1 << 3, + METHOD_HEAP = 1 << 4, + METHOD_DEBUG_OFFSETS = 1 << 5, +}; + +std::vector +buildFrameStack(FrameObject* first_frame, bool resolve_locals) +{ + std::vector frames; + FrameObject* current_frame = first_frame; + + while (current_frame != nullptr) { + auto code = current_frame->Code(); + // Skip frames without code (shim frames) or with unreadable code ("???") + if (!code || code->Filename() == "???") { + auto prev = current_frame->PreviousFrame(); + current_frame = prev.get(); + continue; + } + + if (resolve_locals) { + current_frame->resolveLocalVariables(); + } + + PyFrameData frame_data; + frame_data.code.filename = code->Filename(); + frame_data.code.scope = code->Scope(); + frame_data.code.location = code->Location(); + frame_data.arguments = current_frame->Arguments(); + frame_data.locals = current_frame->Locals(); + frame_data.is_entry = current_frame->IsEntryFrame(); + frame_data.is_shim = current_frame->IsShim(); + + frames.push_back(std::move(frame_data)); + + auto prev = current_frame->PreviousFrame(); + current_frame = prev.get(); + } + + return frames; +} + +PyThreadData +buildPythonThread( + const std::shared_ptr& manager, + PyThread* thread, + pid_t pid, + bool add_native_traces, + bool resolve_locals) +{ + PyThreadData data; + data.tid = thread->Tid(); + data.name = getThreadName(pid, thread->Tid()); + + LOG(INFO) << "Constructing new Python thread with tid " << data.tid; + + if (add_native_traces) { + thread->populateNativeStackTrace(manager); + } + + auto first_frame = thread->FirstFrame(); + if (first_frame) { + data.frames = buildFrameStack(first_frame.get(), resolve_locals); + } + + const auto& native_frames = thread->NativeFrames(); + data.native_frames.assign(native_frames.rbegin(), native_frames.rend()); + + data.gil_status = static_cast(thread->isGilHolder()); + data.gc_status = static_cast(thread->isGCCollecting()); + + return data; +} + +PyThreadData +buildNativeThread(const std::shared_ptr& manager, pid_t pid, pid_t tid) +{ + PyThreadData data; + data.tid = tid; + data.name = getThreadName(pid, tid); + data.gil_status = 0; // NOT_HELD + data.gc_status = 0; // NOT_COLLECTING + + LOG(INFO) << "Constructing new native thread with tid " << tid; + + Thread native_thread(pid, tid); + native_thread.populateNativeStackTrace(manager); + + const auto& native_frames = native_thread.NativeFrames(); + data.native_frames.assign(native_frames.rbegin(), native_frames.rend()); + + return data; +} + +std::vector +buildThreadsFromInterpreter( + const std::shared_ptr& manager, + remote_addr_t interpreter_head, + pid_t pid, + bool add_native_traces, + bool resolve_locals) +{ + LOG(INFO) << "Fetching Python threads"; + std::vector threads; + + auto thread = getThreadFromInterpreterState(manager, interpreter_head); + PyThread* current_thread = thread.get(); + + while (current_thread != nullptr) { + threads.push_back( + buildPythonThread(manager, current_thread, pid, add_native_traces, resolve_locals)); + + auto next = current_thread->NextThread(); + current_thread = next.get(); + } + + return threads; +} + +remote_addr_t +getInterpreterStateAddr(AbstractProcessManager* manager, int method_flags) +{ + remote_addr_t head = 0; + + struct MethodInfo + { + int flag; + const char* name; + std::function func; + }; + + std::vector methods = { + {METHOD_DEBUG_OFFSETS, + "using debug offsets data", + [&]() { return manager->findInterpreterStateFromDebugOffsets(); }}, + {METHOD_ELF_DATA, + "using ELF data", + [&]() { return manager->findInterpreterStateFromElfData(); }}, + {METHOD_SYMBOLS, + "using symbols", + [&]() { return manager->findInterpreterStateFromSymbols(); }}, + {METHOD_BSS, "scanning the BSS", [&]() { return manager->scanBSS(); }}, + {METHOD_ANONYMOUS_MAPS, + "scanning all anonymous maps", + [&]() { return manager->scanAllAnonymousMaps(); }}, + {METHOD_HEAP, "scanning the heap", [&]() { return manager->scanHeap(); }}, + }; + + for (const auto& method : methods) { + if ((method_flags & method.flag) == 0) { + continue; + } + + try { + head = method.func(); + } catch (const std::exception& exc) { + LOG(WARNING) << "Unexpected error finding PyInterpreterState by " << method.name << ": " + << exc.what(); + continue; + } + + if (head != 0) { + LOG(INFO) << "PyInterpreterState found by " << method.name << " at address 0x" << std::hex + << head << std::dec; + return head; + } else { + LOG(INFO) << "Address of PyInterpreterState not found by " << method.name; + } + } + + LOG(INFO) << "Address of PyInterpreterState could not be found"; + return 0; +} + +std::vector +getThreadIds(const std::shared_ptr& manager) +{ + return manager->Tids(); +} + +} // namespace pystack diff --git a/src/pystack/_pystack/thread_builder.h b/src/pystack/_pystack/thread_builder.h new file mode 100644 index 00000000..ac431387 --- /dev/null +++ b/src/pystack/_pystack/thread_builder.h @@ -0,0 +1,72 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "maps_parser.h" +#include "native_frame.h" +#include "process.h" +#include "pycode.h" +#include "pyframe.h" +#include "pythread.h" + +namespace pystack { + +struct PyCodeData +{ + std::string filename; + std::string scope; + LocationInfo location; +}; + +struct PyFrameData +{ + PyCodeData code; + std::unordered_map arguments; + std::unordered_map locals; + bool is_entry; + bool is_shim; +}; + +struct PyThreadData +{ + int tid; + std::optional name; + std::vector frames; + std::vector native_frames; + int gil_status; // -1 = unknown, 0 = not held, 1 = held + int gc_status; // -1 = unknown, 0 = not collecting, 1 = collecting +}; + +std::vector +buildThreadsFromInterpreter( + const std::shared_ptr& manager, + remote_addr_t interpreter_head, + pid_t pid, + bool add_native_traces, + bool resolve_locals); + +PyThreadData +buildPythonThread( + const std::shared_ptr& manager, + PyThread* thread, + pid_t pid, + bool add_native_traces, + bool resolve_locals); + +PyThreadData +buildNativeThread(const std::shared_ptr& manager, pid_t pid, pid_t tid); + +std::vector +buildFrameStack(FrameObject* first_frame, bool resolve_locals); + +remote_addr_t +getInterpreterStateAddr(AbstractProcessManager* manager, int method_flags); + +std::vector +getThreadIds(const std::shared_ptr& manager); + +} // namespace pystack diff --git a/src/pystack/_pystack/unwinder.cpp b/src/pystack/_pystack/unwinder.cpp index 2804c128..abc284b1 100644 --- a/src/pystack/_pystack/unwinder.cpp +++ b/src/pystack/_pystack/unwinder.cpp @@ -82,10 +82,25 @@ frameCallback(Dwfl_Frame* state, void* arg) Dwarf_Addr pc; bool isActivation; if (!dwfl_frame_pc(state, &pc, &isActivation)) { - LOG(DEBUG) << "dwfl_frame_pc failed"; + int dwfl_err = dwfl_errno(); + LOG(DEBUG) << "dwfl_frame_pc failed: " << (dwfl_err ? dwfl_errmsg(dwfl_err) : "no error"); + LOG(DEBUG) << "Total frames gathered before failure: " << frames->size(); return -1; } + // Get additional register info for debugging + Dwarf_Word sp = 0; +#if defined(__x86_64__) + // x86_64 stack pointer is register 7 + dwfl_frame_reg(state, 7, &sp); +#elif defined(__aarch64__) + // aarch64 stack pointer is register 31 + dwfl_frame_reg(state, 31, &sp); +#endif + + LOG(DEBUG) << std::hex << std::showbase << "frameCallback: pc=" << pc << " sp=" << sp + << " isActivation=" << isActivation << " frame_count=" << std::dec << frames->size(); + std::optional stackPointer; // Unwinding through musl libc with elfutils can get stuck returning the // same PC in a loop forever. @@ -308,6 +323,8 @@ AbstractUnwinder::gatherFrames(const std::vector& frames) const if (!raw_symname) { LOG(DEBUG) << std::hex << std::showbase << "Non-inline symbol name could not be resolved @ " << pc; + // Add frame with unknown symbol rather than skipping it + native_frames.push_back({pc, "???", mod_name, 0, 0, mod_name}); continue; } @@ -481,24 +498,34 @@ thread_callback_for_frames(Dwfl_Thread* thread, void* arg) { auto* thread_arg = static_cast(arg); pid_t tid = dwfl_thread_tid(thread); + LOG(DEBUG) << "thread_callback_for_frames: checking thread tid=" << tid << " (looking for " + << thread_arg->tid << ")"; if (tid != thread_arg->tid) { return DWARF_CB_OK; } - switch (dwfl_thread_getframes(thread, frameCallback, (void*)(&(thread_arg->frames)))) { + LOG(DEBUG) << "thread_callback_for_frames: found matching thread, calling dwfl_thread_getframes"; + int result = dwfl_thread_getframes(thread, frameCallback, (void*)(&(thread_arg->frames))); + LOG(DEBUG) << "thread_callback_for_frames: dwfl_thread_getframes returned " << result << ", got " + << thread_arg->frames.size() << " frames"; + + switch (result) { case DWARF_CB_OK: case DWARF_CB_ABORT: break; - case -1: + case -1: { // This may or may not be an error, as it can signal the end of the stack // unwinding. + int dwfl_err = dwfl_errno(); + LOG(DEBUG) << "thread_callback_for_frames: dwfl error: " + << (dwfl_err ? dwfl_errmsg(dwfl_err) : "no error"); if (thread_arg->frames.empty()) { - int dwfl_err = dwfl_errno(); std::string error( dwfl_err ? dwfl_errmsg(dwfl_err) : "unwinding failed with no error reported"); throw UnwinderError("Unknown error happened when gathering thread frames: " + error); } break; + } default: throw UnwinderError("Unknown error happened when gathering thread frames"); } diff --git a/src/pystack/_pystack/version_detector.cpp b/src/pystack/_pystack/version_detector.cpp new file mode 100644 index 00000000..8db20299 --- /dev/null +++ b/src/pystack/_pystack/version_detector.cpp @@ -0,0 +1,165 @@ +#include "version_detector.h" + +#include +#include +#include +#include +#include + +#include "logging.h" + +namespace pystack { + +namespace fs = std::filesystem; + +// Regex patterns for version detection +// Matches: "3.8.10 (default, May 26 2023, 14:05:08)" or similar version strings in BSS +static const std::regex BSS_VERSION_REGEXP( + R"(((2|3)\.(\d+)\.(\d{1,2}))((a|b|c|rc)\d{1,2})?\+?(?: (?:experimental )?free-threading build)? (\(.{1,64}\)))"); + +// Matches: python3.8, python3.10, etc. +static const std::regex BINARY_REGEXP(R"(python(\d+)\.(\d+).*)", std::regex_constants::icase); + +// Matches: libpython3.8.so, libpython3.10.so.1.0, etc. +static const std::regex LIBPYTHON_REGEXP(R"(.*libpython(\d+)\.(\d+).*)", std::regex_constants::icase); + +static std::optional +scanProcessBssForVersion(pid_t pid, const VirtualMap& bss, AbstractRemoteMemoryManager* manager) +{ + if (!manager) { + return std::nullopt; + } + + size_t size = bss.Size(); + std::vector memory(size); + + try { + ssize_t bytes_read = manager->copyMemoryFromProcess(bss.Start(), size, memory.data()); + if (bytes_read < 0) { + return std::nullopt; + } + } catch (...) { + return std::nullopt; + } + + std::string memory_str(memory.begin(), memory.end()); + std::smatch match; + if (std::regex_search(memory_str, match, BSS_VERSION_REGEXP)) { + int major = std::stoi(match[2].str()); + int minor = std::stoi(match[3].str()); + return PythonVersion(major, minor); + } + + return std::nullopt; +} + +static std::optional +scanCoreBssForVersion(const std::string& corefile, const VirtualMap& bss) +{ + std::ifstream file(corefile, std::ios::binary); + if (!file.is_open()) { + return std::nullopt; + } + + file.seekg(bss.Offset()); + if (!file.good()) { + return std::nullopt; + } + + size_t size = bss.Size(); + std::vector data(size); + file.read(data.data(), size); + if (!file.good() && !file.eof()) { + return std::nullopt; + } + + std::string data_str(data.begin(), data.end()); + std::smatch match; + if (std::regex_search(data_str, match, BSS_VERSION_REGEXP)) { + int major = std::stoi(match[2].str()); + int minor = std::stoi(match[3].str()); + return PythonVersion(major, minor); + } + + return std::nullopt; +} + +static std::optional +inferVersionFromPath(const std::string& path) +{ + std::string filename = fs::path(path).filename().string(); + + std::smatch match; + if (std::regex_match(filename, match, LIBPYTHON_REGEXP)) { + int major = std::stoi(match[1].str()); + int minor = std::stoi(match[2].str()); + LOG(INFO) << "Version inferred from libpython path: " << major << "." << minor; + return PythonVersion(major, minor); + } + + if (std::regex_match(filename, match, BINARY_REGEXP)) { + int major = std::stoi(match[1].str()); + int minor = std::stoi(match[2].str()); + LOG(INFO) << "Version inferred from binary path: " << major << "." << minor; + return PythonVersion(major, minor); + } + + return std::nullopt; +} + +static PythonVersion +getVersionFromMapInfo(const ProcessMemoryMapInfo& mapinfo) +{ + if (mapinfo.libpython && !mapinfo.libpython->Path().empty()) { + LOG(INFO) << "Trying to extract version from filename: " << mapinfo.libpython->Path(); + auto version = inferVersionFromPath(mapinfo.libpython->Path()); + if (version) { + return *version; + } + } + + if (!mapinfo.python.Path().empty()) { + LOG(INFO) << "Trying to extract version from filename: " << mapinfo.python.Path(); + auto version = inferVersionFromPath(mapinfo.python.Path()); + if (version) { + return *version; + } + } + + throw std::runtime_error("Could not determine python version from " + mapinfo.python.Path()); +} + +PythonVersion +getVersionForProcess( + pid_t pid, + const ProcessMemoryMapInfo& mapinfo, + AbstractRemoteMemoryManager* manager) +{ + if (mapinfo.bss) { + auto version = scanProcessBssForVersion(pid, *mapinfo.bss, manager); + if (version) { + LOG(INFO) << "Version found by scanning the bss section: " << version->first << "." + << version->second; + return *version; + } + } + + return getVersionFromMapInfo(mapinfo); +} + +PythonVersion +getVersionForCore(const std::string& corefile, const ProcessMemoryMapInfo& mapinfo) +{ + if (mapinfo.bss) { + auto version = scanCoreBssForVersion(corefile, *mapinfo.bss); + if (version) { + LOG(INFO) << "Version found by scanning the bss section: " << version->first << "." + << version->second; + return *version; + } + } + + return getVersionFromMapInfo(mapinfo); +} + +} // namespace pystack diff --git a/src/pystack/_pystack/version_detector.h b/src/pystack/_pystack/version_detector.h new file mode 100644 index 00000000..403b8675 --- /dev/null +++ b/src/pystack/_pystack/version_detector.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +#include "maps_parser.h" +#include "mem.h" + +namespace pystack { + +using PythonVersion = std::pair; + +PythonVersion +getVersionForProcess( + pid_t pid, + const ProcessMemoryMapInfo& mapinfo, + AbstractRemoteMemoryManager* manager); + +PythonVersion +getVersionForCore(const std::string& corefile, const ProcessMemoryMapInfo& mapinfo); + +} // namespace pystack diff --git a/src/pystack/errors.py b/src/pystack/errors.py index 5bbd58d3..4dd615cc 100644 --- a/src/pystack/errors.py +++ b/src/pystack/errors.py @@ -1,6 +1,11 @@ import pathlib +from functools import wraps from typing import Any +from typing import Callable from typing import Optional +from typing import TypeVar + +F = TypeVar("F", bound=Callable[..., Any]) DETECTED_EXECUTABLE_NOT_FOUND_TEXT = """\ The executable that was automatically located by pystack doesn't exist. @@ -128,3 +133,19 @@ class InvalidExecutable(PystackError): class MissingExecutableMaps(PystackError): HELP_TEXT = MISSING_EXECUTABLE_MAPS_HELP_TEXT + + +def intercept_runtime_errors() -> Callable[[F], F]: + """Decorator that converts RuntimeError to EngineError.""" + + def decorator(func: F) -> F: + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + try: + return func(*args, **kwargs) + except RuntimeError as e: + raise EngineError(str(e)) from e + + return wrapper # type: ignore[return-value] + + return decorator diff --git a/src/pystack/maps.py b/src/pystack/maps.py index eda70bcc..a95a43de 100644 --- a/src/pystack/maps.py +++ b/src/pystack/maps.py @@ -1,46 +1,17 @@ -import collections +"""Memory map data classes for process analysis. + +This module provides data classes for representing memory maps. +The actual parsing is done in C++. +""" import dataclasses -import logging -import os -import re from pathlib import Path -from typing import Any -from typing import Dict -from typing import Iterable -from typing import List from typing import Optional -from .errors import MissingExecutableMaps -from .errors import ProcessNotFound -from .errors import PystackError - -LOGGER = logging.getLogger(__file__) - -MAPS_REGEXP = re.compile( - r""" - (?P[\da-f]+) - - - (?P[\da-f]+) - \s - (?P....) - \s - (?P[\da-f]+) - \s - (?P[\da-f][\da-f]+:[\da-f][\da-f]+) - \s - (?P\d+) - \s* - (?P.+)? - $ - """, - re.VERBOSE, -) - -RawCoreMapList = List[Dict[str, Any]] - @dataclasses.dataclass(frozen=True, eq=True) class VirtualMap: + """Represents a memory-mapped region in a process's virtual address space.""" + start: int end: int filesize: int @@ -51,22 +22,28 @@ class VirtualMap: path: Optional[Path] def contains(self, addr: int) -> bool: + """Check if the given address is within this memory map.""" return self.start <= addr < self.end def is_executable(self) -> bool: + """Check if this memory region is executable.""" return "x" in self.flags def is_readable(self) -> bool: + """Check if this memory region is readable.""" return "r" in self.flags def is_writable(self) -> bool: + """Check if this memory region is writable.""" return "w" in self.flags def is_private(self) -> bool: + """Check if this memory region is private (copy-on-write).""" return "p" in self.flags @property def size(self) -> int: + """Return the size of this memory region.""" return self.end - self.start def __repr__(self) -> str: @@ -84,244 +61,18 @@ def __repr__(self) -> str: @dataclasses.dataclass class MemoryRange: + """Represents a range of memory addresses.""" + min_addr: int max_addr: int @dataclasses.dataclass class MemoryMapInformation: + """Container for memory map information needed for process analysis.""" + memory: MemoryRange heap: Optional[VirtualMap] bss: Optional[VirtualMap] python: VirtualMap libpython: Optional[VirtualMap] - - -def _read_maps(pid: int) -> List[str]: - try: - with open(f"/proc/{pid}/maps") as maps: - return maps.readlines() - except FileNotFoundError: - raise ProcessNotFound(f"No such process id: {pid}") from None - - -def generate_maps_for_process(pid: int) -> Iterable[VirtualMap]: - proc_maps_lines = _read_maps(pid) - for index, line in enumerate(proc_maps_lines): - line = line.rstrip("\n") - match = MAPS_REGEXP.match(line) - if not match: - LOGGER.debug("Line %r cannot be recognized!", line) - continue - - path = match.group("pathname") - yield VirtualMap( - start=int(match.group("start"), 16), - end=int(match.group("end"), 16), - filesize=int(match.group("end"), 16) - int(match.group("start"), 16), - offset=int(match.group("offset"), 16), - device=match.group("dev"), - flags=match.group("permissions"), - inode=int(match.group("inode")), - path=Path(path) if path else None, - ) - - -def generate_maps_from_core_data( - mapped_files: RawCoreMapList, memory_maps: RawCoreMapList -) -> Iterable[VirtualMap]: - memory_map_ranges = {(map["start"], map["end"]) for map in memory_maps} - missing_mapped_files = [ - map - for map in mapped_files - if (map["start"], map["end"]) not in memory_map_ranges - ] - - all_maps: RawCoreMapList = sorted( - memory_maps + missing_mapped_files, key=lambda map: map["start"] - ) - - # Some paths in the mapped files can be absolute, but we need to work with the canonical - # paths that the linker reported, so we need to "unresolve" those path back to whatever - # the memory math paths are so we can properly group then together. For example, the map - # for the interpreter may be "/usr/bin/python" in the mapped files and "/venv/bin/python" - # in the memory maps. - missing_map_paths = { - Path(map["path"]) for map in missing_mapped_files if map is not None - } - file_maps = {} - for map in memory_maps: - if not map["path"]: - continue - the_path = Path(map["path"]) - resolved_path = the_path.resolve() - if resolved_path in missing_map_paths: - file_maps[resolved_path] = the_path - - for data_elem in all_maps: - path = Path(data_elem["path"]) if data_elem["path"] else None - if path is not None: - path = file_maps.get(path, path) - - yield VirtualMap( - start=data_elem["start"], - end=data_elem["end"], - filesize=data_elem["filesize"], - offset=data_elem["offset"], - device=data_elem["device"], - flags=data_elem["flags"], - inode=data_elem["inode"], - path=path, - ) - - -def parse_maps_file(pid: int, all_maps: Iterable[VirtualMap]) -> MemoryMapInformation: - binary_name = Path(os.readlink(f"/proc/{pid}/exe")) - return parse_maps_file_for_binary(binary_name, all_maps) - - -def _get_base_map(binary_maps: List[VirtualMap]) -> VirtualMap: - maybe_map = next( - (map for map in binary_maps if map.path is not None), - None, - ) - if maybe_map is not None: - return maybe_map - first_map, *_ = binary_maps - return first_map - - -def _get_bss(elf_maps: List[VirtualMap], load_point: int) -> Optional[VirtualMap]: - binary_map = _get_base_map(elf_maps) - if not binary_map or not binary_map.path: - return None - try: - from ._pystack import get_bss_info - except ImportError: # pragma: no cover - return None - bss_info = get_bss_info(binary_map.path) - if not bss_info: - return None - start = load_point + bss_info["corrected_addr"] - LOGGER.info( - "Determined exact addr of .bss section: %s (%s + %s)", - hex(start), - hex(load_point), - hex(bss_info["corrected_addr"]), - ) - offset = 0 - - # Calculate the offset based on the mapped files. The offset in core files - # is only present in the core (and not in the original ELF) so this - # operation allows us to correlate the bss section with some memory location - # within the core file. - first_matching_map = next((map for map in elf_maps if map.contains(start)), None) - if first_matching_map is None: - return None - - offset = first_matching_map.offset + (start - first_matching_map.start) - - bss = VirtualMap( - start=start, - end=start + bss_info["size"], - filesize=bss_info["size"], - offset=offset, - device="", - flags="", - inode=0, - path=None, - ) - return bss - - -def parse_maps_file_for_binary( - binary_name: Path, - all_maps_iter: Iterable[VirtualMap], - load_point_by_module: Optional[Dict[str, int]] = None, -) -> MemoryMapInformation: - min_addr = float("inf") - max_addr = 0 - maps_by_library: Dict[str, List[VirtualMap]] = collections.defaultdict(list) - current_lib = "" - all_maps = tuple(all_maps_iter) - - if load_point_by_module is None: - load_point_by_module = collections.defaultdict(lambda: 2**64) - for memory_range in all_maps: - if memory_range.path is not None: - load_point_by_module[memory_range.path.name] = min( - memory_range.start, - load_point_by_module[memory_range.path.name], - ) - - for memory_range in all_maps: - current_lib = ( - memory_range.path.name if memory_range.path is not None else current_lib - ) - maps_by_library[current_lib].append(memory_range) - - if memory_range.path is None or not memory_range.path.name.startswith("[v"): - min_addr = min(min_addr, memory_range.start) - max_addr = max(max_addr, memory_range.end) - maps_by_library = dict(maps_by_library) - - python = libpython = bss = heap = None - try: - binary_maps = maps_by_library[binary_name.name] - python = _get_base_map(binary_maps) - except KeyError: - LOGGER.debug("Unable to find maps for %r in %r", binary_name, maps_by_library) - available_maps = { - str(map.path) - for map in all_maps - if map.path is not None and ".so" not in map.path.name - } - LOGGER.debug("Available executable maps: %s", ", ".join(available_maps)) - if available_maps: - maps_txt = ", ".join(available_maps) - msg = f"These are the available executable memory maps: {maps_txt}" - else: - msg = "There are no available executable maps with known paths." - raise MissingExecutableMaps( - f"Unable to find maps for the executable {binary_name}. " + msg - ) - LOGGER.info("python binary first map found: %r", python) - - libpython_binaries = [lib for lib in maps_by_library if "libpython" in lib] - if len(libpython_binaries) > 1: - raise PystackError( - f"Unexpectedly found multiple libpython in process: {libpython_binaries}" - ) - elif len(libpython_binaries) == 1: - libpython_name = libpython_binaries[0] - libpython_maps = maps_by_library[libpython_name] - load_point = load_point_by_module[libpython_name] - elf_maps = libpython_maps - libpython = _get_base_map(libpython_maps) - LOGGER.info("%r first map found: %r", libpython_name, libpython) - else: - LOGGER.info("Process does not have a libpython.so, reading from binary") - elf_maps = binary_maps - libpython = None - load_point = load_point_by_module[binary_name.name] - - heap_maps = maps_by_library.get("[heap]") - if heap_maps is not None: - *_, heap = [m for m in heap_maps if getattr(m.path, "name", None) == "[heap]"] - LOGGER.info("Heap map found: %r", heap) - - bss = _get_bss(elf_maps, load_point) - if bss is None: - bss = ( - next( - (map for map in elf_maps if map.path is None and map.is_readable()), - None, - ) - if elf_maps - else None - ) - if bss: - LOGGER.info("bss map found: %r", bss) - - memory = MemoryRange(min_addr=int(min_addr), max_addr=int(max_addr)) - return MemoryMapInformation(memory, heap, bss, python, libpython) diff --git a/src/pystack/process.py b/src/pystack/process.py index c44407f9..50c13df2 100644 --- a/src/pystack/process.py +++ b/src/pystack/process.py @@ -1,129 +1,15 @@ +"""Process utility functions. + +This module provides utility functions for checking file types +and decompressing gzip files. +""" import gzip -import logging import pathlib -import re -import subprocess import tempfile -from typing import Optional -from typing import Tuple - -from .errors import InvalidPythonProcess -from .maps import MemoryMapInformation -from .maps import VirtualMap - -VERSION_REGEXP = re.compile(r"Python (?P\d+)\.(?P\d+).*", re.IGNORECASE) - -BINARY_REGEXP = re.compile(r"python(?P\d+)\.(?P\d+).*", re.IGNORECASE) - -LIBPYTHON_REGEXP = re.compile( - r".*libpython(?P\d+)\.(?P\d+).*", re.IGNORECASE -) - -# Strings like "3.8.10 (default, May 26 2023, 14:05:08)" -# or "2.7.18rc1 (v2.7.18rc1:8d21aa21f2, Apr 20 2020, 13:19:08)" -# or "3.13.0+ experimental free-threading build (Python)" -BSS_VERSION_REGEXP = re.compile( - rb"((2|3)\.(\d+)\.(\d{1,2}))((a|b|c|rc)\d{1,2})?\+?" - rb"(?: (?:experimental )?free-threading build)? (\(.{1,64}\))" -) - -LOGGER = logging.getLogger(__file__) - - -def scan_process_bss_for_python_version( - pid: int, bss: VirtualMap -) -> Optional[Tuple[int, int]]: - # Lazy import _pystack to overcome a circular-import - # (we really don't want a new extension just for this) :( - try: - from pystack._pystack import copy_memory_from_address - except ImportError: # pragma: no cover - return None - memory = copy_memory_from_address(pid, bss.start, bss.size) - match = BSS_VERSION_REGEXP.findall(memory) - if not match: - return None - ((_, major, minor, patch, *_),) = match - return int(major), int(minor) - - -def scan_core_bss_for_python_version( - corefile: pathlib.Path, bss: VirtualMap -) -> Optional[Tuple[int, int]]: - with open(corefile, "rb") as the_corefile: - the_corefile.seek(bss.offset) - data = the_corefile.read(bss.size) - match = next(BSS_VERSION_REGEXP.finditer(data), None) - if not match: - return None - _, major, minor, patch, *_ = match.groups() - return int(major), int(minor) - - -def _get_python_version_from_map_information( - mapinfo: MemoryMapInformation, -) -> Tuple[int, int]: - match = None - assert mapinfo.python.path is not None - if mapinfo.libpython: - assert mapinfo.libpython.path is not None - LOGGER.info( - "Trying to extract version from filename: %s", mapinfo.libpython.path.name - ) - match = LIBPYTHON_REGEXP.match(mapinfo.libpython.path.name) - else: - LOGGER.info( - "Trying to extract version from filename: %s", mapinfo.python.path.name - ) - match = BINARY_REGEXP.match(mapinfo.python.path.name) - if match is None: - LOGGER.info( - "Could not find version by looking at library or binary path: " - "Trying to get it from running python --version" - ) - output = subprocess.check_output( - [mapinfo.python.path, "--version"], text=True, stderr=subprocess.STDOUT - ) - match = VERSION_REGEXP.match(output) - if not match: - raise InvalidPythonProcess( - f"Could not determine python version from {mapinfo.python.path}" - ) - major = match.group("major") - minor = match.group("minor") - LOGGER.info("Python version determined: %s.%s", major, minor) - return int(major), int(minor) - - -def get_python_version_for_process( - pid: int, mapinfo: MemoryMapInformation -) -> Tuple[int, int]: - if mapinfo.bss is not None: - version_from_bss = scan_process_bss_for_python_version(pid, mapinfo.bss) - if version_from_bss is not None: - LOGGER.info( - "Version found by scanning the bss section: %d.%d", *version_from_bss - ) - return version_from_bss - - return _get_python_version_from_map_information(mapinfo) - - -def get_python_version_for_core( - corefile: pathlib.Path, executable: pathlib.Path, mapinfo: MemoryMapInformation -) -> Tuple[int, int]: - if mapinfo.bss is not None: - version_from_bss = scan_core_bss_for_python_version(corefile, mapinfo.bss) - if version_from_bss is not None: - LOGGER.info( - "Version found by scanning the bss section: %d.%d", *version_from_bss - ) - return version_from_bss - return _get_python_version_from_map_information(mapinfo) def is_elf(filename: pathlib.Path) -> bool: - "Return True if the given file is an ELF file" + """Return True if the given file is an ELF file.""" try: elf_header = b"\x7fELF" with open(filename, "br") as thefile: @@ -132,23 +18,14 @@ def is_elf(filename: pathlib.Path) -> bool: return False -def get_thread_name(pid: int, tid: int) -> Optional[str]: - try: - with open(f"/proc/{pid}/task/{tid}/comm") as comm: - return comm.read().strip() - except OSError: - return None - - def is_gzip(filename: pathlib.Path) -> bool: - """ - Checks if the given file is a Gzip file based on the header. + """Check if the given file is a Gzip file based on the header. Args: - filename (pathlib.Path): The path to the file to be checked. + filename: The path to the file to be checked. Returns: - bool: True if the file starts with the Gzip header, False otherwise. + True if the file starts with the Gzip header, False otherwise. """ gzip_header = b"\x1f\x8b" with open(filename, "rb") as thefile: @@ -158,7 +35,7 @@ def is_gzip(filename: pathlib.Path) -> bool: def decompress_gzip( filename: pathlib.Path, chunk_size: int = 4 * 1024 * 1024 ) -> pathlib.Path: - """Decompresses a Gzip file and writes the contents to a temporary file. + """Decompress a Gzip file and write the contents to a temporary file. Args: filename: The path to the gzip file to decompress. diff --git a/tests/integration/test_gather_stacks.py b/tests/integration/test_gather_stacks.py index 693ab11b..f3a3a8a0 100644 --- a/tests/integration/test_gather_stacks.py +++ b/tests/integration/test_gather_stacks.py @@ -2,17 +2,9 @@ import subprocess import sys from pathlib import Path -from unittest.mock import mock_open -from unittest.mock import patch - -import pytest from pystack.engine import NativeReportingMode -from pystack.engine import StackMethod from pystack.engine import get_process_threads -from pystack.errors import NotEnoughInformation -from pystack.maps import MAPS_REGEXP -from pystack.process import get_thread_name from pystack.types import LocationInfo from pystack.types import NativeFrame from pystack.types import frame_type @@ -313,150 +305,6 @@ def test_multiple_thread_stack_native(python, method, blocking, tmpdir): assert any(frame.path and "?" not in frame.path for frame in eval_frames) -def test_gather_stack_with_heap_fails_if_no_heap(tmpdir): - # GIVEN / WHEN - - with spawn_child_process( - sys.executable, TEST_SINGLE_THREAD_FILE, tmpdir - ) as child_process: - the_data = [] - with open(f"/proc/{child_process.pid}/maps") as f: - for line in f.readlines(): - match = MAPS_REGEXP.match(line) - assert match is not None - if match.group("pathname") and "[heap]" in match.group("pathname"): - line = line.replace("[heap]", "[mysterious_segment]") - the_data.append(line) - data = "".join(the_data) - with patch("builtins.open", mock_open(read_data=data)): - # THEN - - with pytest.raises(NotEnoughInformation): - list( - get_process_threads( - child_process.pid, stop_process=True, method=StackMethod.HEAP - ) - ) - - -def test_gather_stack_with_bss_fails_if_no_bss(tmpdir): - # GIVEN / WHEN - - with spawn_child_process( - sys.executable, TEST_SINGLE_THREAD_FILE, tmpdir - ) as child_process: - the_data = [] - with open(f"/proc/{child_process.pid}/maps") as f: - for line in f.readlines(): - match = MAPS_REGEXP.match(line) - assert match is not None - if not match.group("pathname"): - line = line.replace("\n", "[mysterious_segment]\n") - the_data.append(line) - - data = "".join(the_data) - - with patch("builtins.open", mock_open(read_data=data)), patch( - "pystack.maps._get_bss", return_value=None - ): - # THEN - - with pytest.raises(NotEnoughInformation): - list( - get_process_threads( - child_process.pid, stop_process=True, method=StackMethod.BSS - ) - ) - - -def test_gather_stack_auto_works_if_no_bss(tmpdir): - # GIVEN / WHEN - - with spawn_child_process( - sys.executable, TEST_SINGLE_THREAD_FILE, tmpdir - ) as child_process: - the_data = [] - with open(f"/proc/{child_process.pid}/maps") as f: - for line in f.readlines(): - match = MAPS_REGEXP.match(line) - assert match is not None - if not match.group("pathname"): - line = line.replace("\n", "[mysterious_segment]\n") - the_data.append(line) - data = "".join(the_data) - with patch("builtins.open", mock_open(read_data=data)), patch( - "pystack.maps._get_bss", return_value=None - ): - threads = list( - get_process_threads( - child_process.pid, stop_process=True, method=StackMethod.AUTO - ) - ) - - # THEN - - assert len(threads) == 1 - (thread,) = threads - - frames = list(thread.frames) - assert (len(frames)) == 4 - - filenames = {frame.code.filename for frame in frames} - assert filenames == {str(TEST_SINGLE_THREAD_FILE)} - - functions = [frame.code.scope for frame in frames] - assert functions == ["", "first_func", "second_func", "third_func"] - - *line_numbers, last_line = [frame.code.location.lineno for frame in frames] - assert line_numbers == [20, 6, 10] - assert last_line in {16, 17} - - assert not thread.native_frames - - -def test_gather_stack_auto_works_if_no_heap(tmpdir): - # GIVEN / WHEN - - with spawn_child_process( - sys.executable, TEST_SINGLE_THREAD_FILE, tmpdir - ) as child_process: - the_data = [] - with open(f"/proc/{child_process.pid}/maps") as f: - for line in f.readlines(): - match = MAPS_REGEXP.match(line) - assert match is not None - if match.group("pathname") and "[heap]" in match.group("pathname"): - line = line.replace("[heap]", "[mysterious_segment]") - the_data.append(line) - data = "".join(the_data) - with patch("builtins.open", mock_open(read_data=data)): - threads = list( - get_process_threads( - child_process.pid, stop_process=True, method=StackMethod.AUTO - ) - ) - - # THEN - - assert len(threads) == 1 - (thread,) = threads - - frames = list(thread.frames) - assert (len(frames)) == 4 - - filenames = {frame.code.filename for frame in frames} - assert filenames == {str(TEST_SINGLE_THREAD_FILE)} - - functions = [frame.code.scope for frame in frames] - assert functions == ["", "first_func", "second_func", "third_func"] - - *line_numbers, last_line = [frame.code.location.lineno for frame in frames] - assert line_numbers == [20, 6, 10] - assert last_line in {16, 17} - - assert not thread.native_frames - - @ALL_PYTHONS def test_thread_registered_with_python_but_with_no_python_calls(python, tmpdir): # GIVEN @@ -584,14 +432,6 @@ def test_get_thread_name(tmpdir): assert "thread_foo" in {thread.name for thread in threads} -def test_get_thread_name_oserror(): - # WHEN - thread_name = get_thread_name(pid=0, tid=0) - - # THEN - assert thread_name is None - - @ALL_PYTHONS def test_inlined_python_calls(python, tmpdir): # GIVEN diff --git a/tests/integration/test_process.py b/tests/integration/test_process.py index a6518615..190c4443 100644 --- a/tests/integration/test_process.py +++ b/tests/integration/test_process.py @@ -1,72 +1,20 @@ import sys +import threading +from concurrent.futures import ThreadPoolExecutor from pathlib import Path import pytest from pystack._pystack import ProcessManager -from pystack.engine import CoreFileAnalyzer from pystack.engine import get_process_threads -from pystack.errors import EngineError -from pystack.maps import generate_maps_for_process -from pystack.maps import parse_maps_file -from pystack.maps import parse_maps_file_for_binary from pystack.process import is_elf -from pystack.process import scan_core_bss_for_python_version -from pystack.process import scan_process_bss_for_python_version from tests.utils import ALL_PYTHONS -from tests.utils import generate_core_file from tests.utils import spawn_child_process TEST_SINGLE_THREAD_FILE = Path(__file__).parent / "single_thread_program.py" TEST_SHUTDOWN_FILE = Path(__file__).parent / "shutdown_program.py" -@ALL_PYTHONS -def test_remote_version_detection_using_bss_section(python, tmpdir): - # GIVEN - - (expected_major, expected_minor), python_executable = python - - # WHEN - - with spawn_child_process( - python_executable, TEST_SINGLE_THREAD_FILE, tmpdir - ) as child_process: - all_maps = generate_maps_for_process(child_process.pid) - maps = parse_maps_file(child_process.pid, all_maps) - major, minor = scan_process_bss_for_python_version(child_process.pid, maps.bss) - - # THEN - - assert major == expected_major - assert minor == expected_minor - - -@ALL_PYTHONS -def test_core_version_detection_using_bss_section(python, tmpdir): - # GIVEN - - (expected_major, expected_minor), python_executable = python - - # WHEN - - with generate_core_file( - python_executable, TEST_SINGLE_THREAD_FILE, tmpdir - ) as corefile: - core_map_analyzer = CoreFileAnalyzer(str(corefile), str(python_executable)) - virtual_maps = tuple(core_map_analyzer.extract_maps()) - load_point_by_module = core_map_analyzer.extract_module_load_points() - maps = parse_maps_file_for_binary( - python_executable, virtual_maps, load_point_by_module - ) - major, minor = scan_core_bss_for_python_version(corefile, maps.bss) - - # THEN - - assert major == expected_major - assert minor == expected_minor - - @ALL_PYTHONS def test_detection_of_interpreter_shutdown(python, tmpdir): # GIVEN @@ -120,11 +68,29 @@ def test_reattaching_to_already_traced_process(python, tmpdir): pid = child_process.pid # WHEN / THEN - with pytest.raises(EngineError, match="Operation not permitted"): - it1 = iter(get_process_threads(pid, stop_process=True)) - it2 = iter(get_process_threads(pid, stop_process=True)) - next(it1) - next(it2) + # Use threading to create overlapping attachment attempts. + # The first thread holds the ptrace attachment while the second tries to attach. + barrier = threading.Barrier(2) + results = [] + errors = [] + + def attach_thread(): + try: + barrier.wait(timeout=5) # Synchronize start + threads = list(get_process_threads(pid, stop_process=True)) + results.append(len(threads)) + except Exception as e: + errors.append(str(e)) + + with ThreadPoolExecutor(max_workers=2) as executor: + f1 = executor.submit(attach_thread) + f2 = executor.submit(attach_thread) + f1.result(timeout=10) + f2.result(timeout=10) + + # One should succeed, one should fail with "Operation not permitted" + assert len(results) + len(errors) == 2 + assert any("Operation not permitted" in err for err in errors) @pytest.mark.parametrize( diff --git a/tests/unit/test_maps.py b/tests/unit/test_maps.py deleted file mode 100644 index 4f829749..00000000 --- a/tests/unit/test_maps.py +++ /dev/null @@ -1,1267 +0,0 @@ -from pathlib import Path -from unittest.mock import mock_open -from unittest.mock import patch - -import pytest - -from pystack.errors import MissingExecutableMaps -from pystack.errors import ProcessNotFound -from pystack.errors import PystackError -from pystack.maps import VirtualMap -from pystack.maps import _get_base_map -from pystack.maps import _get_bss -from pystack.maps import generate_maps_for_process -from pystack.maps import parse_maps_file_for_binary - - -def test_virtual_map(): - # GIVEN - - map = VirtualMap( - start=0, - end=10, - offset=1234, - device="device", - flags="xrwp", - inode=42, - path=None, - filesize=10, - ) - - # WHEN / THEN - - assert map.contains(5) - assert not map.contains(15) - assert map.is_private() - assert map.is_executable() - assert map.is_readable() - assert map.is_writable() - - -def test_simple_maps_no_such_pid(): - # GIVEN - - with patch("builtins.open", side_effect=FileNotFoundError()): - # WHEN / THEN - with pytest.raises(ProcessNotFound): - list(generate_maps_for_process(1)) - - -def test_simple_maps(): - # GIVEN - - map_text = """ -7f1ac1e2b000-7f1ac1e50000 r--p 00000000 08:12 8398159 /usr/lib/libc-2.31.so - """ - - # WHEN - - with patch("builtins.open", mock_open(read_data=map_text)): - maps = list(generate_maps_for_process(1)) - - # THEN - - assert maps == [ - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - -def test_maps_with_long_device_numbers(): - # GIVEN - - map_text = """ -7f1ac1e2b000-7f1ac1e50000 r--p 00000000 0123:4567 8398159 /usr/lib/libc-2.31.so - """ - - # WHEN - - with patch("builtins.open", mock_open(read_data=map_text)): - maps = list(generate_maps_for_process(1)) - - # THEN - - assert maps == [ - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="0123:4567", - flags="r--p", - inode=8398159, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - -def test_anonymous_maps(): - # GIVEN - - map_text = """ -7f1ac1e2b000-7f1ac1e50000 r--p 00000000 08:12 8398159 - """ - - # WHEN - - with patch("builtins.open", mock_open(read_data=map_text)): - maps = list(generate_maps_for_process(1)) - - # THEN - - assert maps == [ - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=None, - ), - ] - - -def test_map_permissions(): - # GIVEN - - map_text = """ -7f1ac1e2b000-7f1ac1e50000 r--- 00000000 08:12 8398159 /usr/lib/libc-2.31.so -7f1ac1e2b000-7f1ac1e50000 rw-- 00000000 08:12 8398159 /usr/lib/libc-2.31.so -7f1ac1e2b000-7f1ac1e50000 rwx- 00000000 08:12 8398159 /usr/lib/libc-2.31.so -7f1ac1e2b000-7f1ac1e50000 rwxp 00000000 08:12 8398159 /usr/lib/libc-2.31.so - """ - - # WHEN - - with patch("builtins.open", mock_open(read_data=map_text)): - maps = list(generate_maps_for_process(1)) - - # THEN - - assert maps == [ - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="08:12", - flags="r---", - inode=8398159, - path=Path("/usr/lib/libc-2.31.so"), - ), - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="08:12", - flags="rw--", - inode=8398159, - path=Path("/usr/lib/libc-2.31.so"), - ), - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="08:12", - flags="rwx-", - inode=8398159, - path=Path("/usr/lib/libc-2.31.so"), - ), - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="08:12", - flags="rwxp", - inode=8398159, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - -def test_unexpected_line_is_ignored(): - # GIVEN - - map_text = """ -I am an unexpected line -7f1ac1e2b000-7f1ac1e50000 r--p 00000000 08:12 8398159 /usr/lib/libc-2.31.so - """ - - # WHEN - - with patch("builtins.open", mock_open(read_data=map_text)): - maps = list(generate_maps_for_process(1)) - - # THEN - - assert maps == [ - VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=151552, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - -def test_special_maps(): - # GIVEN - - map_text = """ -555f1ab1c000-555f1ab3d000 rw-p 00000000 00:00 0 [heap] -7ffdf8102000-7ffdf8124000 rw-p 00000000 00:00 0 [stack] -7ffdf8152000-7ffdf8155000 r--p 00000000 00:00 0 [vvar] -7ffdf8155000-7ffdf8156000 r-xp 00000000 00:00 0 [vdso] -ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall] - """ - - # WHEN - - with patch("builtins.open", mock_open(read_data=map_text)): - maps = list(generate_maps_for_process(1)) - - # THEN - - assert maps == [ - VirtualMap( - start=93866958110720, - end=93866958245888, - filesize=135168, - offset=0, - device="00:00", - flags="rw-p", - inode=0, - path=Path("[heap]"), - ), - VirtualMap( - start=140728765259776, - end=140728765399040, - filesize=139264, - offset=0, - device="00:00", - flags="rw-p", - inode=0, - path=Path("[stack]"), - ), - VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=12288, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("[vvar]"), - ), - VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("[vdso]"), - ), - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("[vsyscall]"), - ), - ] - - -def test_maps_for_binary_only_python_exec(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - maps = [ - python, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython is None - assert mapinfo.bss is None - assert mapinfo.heap is None - - -def test_maps_for_binary_with_heap(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - heap = VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=12288, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("[heap]"), - ) - - maps = [ - python, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - heap, - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython is None - assert mapinfo.bss is None - assert mapinfo.heap == heap - - -def test_maps_for_binary_with_libpython(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - libpython = VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("/some/path/to/libpython.so"), - ) - - maps = [ - python, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - libpython, - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython == libpython - assert mapinfo.bss is None - assert mapinfo.heap is None - - -def test_maps_for_binary_executable_with_bss(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - bss = VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=4096, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=None, - ) - - maps = [ - python, - bss, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython is None - assert mapinfo.bss == bss - assert mapinfo.heap is None - - -def test_maps_for_binary_libpython_with_bss(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - bss = VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=4096, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=None, - ) - - libpython = VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("/some/path/to/libpython.so"), - ) - - libpython_bss = VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=None, - ) - - maps = [ - python, - bss, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - libpython, - libpython_bss, - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython == libpython - assert mapinfo.bss == libpython_bss - assert mapinfo.heap is None - - -def test_maps_for_binary_libpython_without_bss(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - bss = VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=4096, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=None, - ) - - libpython = VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("/some/path/to/libpython.so"), - ) - - maps = [ - python, - bss, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - libpython, - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython == libpython - assert mapinfo.bss is None - assert mapinfo.heap is None - - -def test_maps_for_binary_libpython_with_bss_with_non_readable_segment(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - bss = VirtualMap( - start=139752898736128, - end=139752898887680, - filesize=4096, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=None, - ) - - libpython = VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("/some/path/to/libpython.so"), - ) - - libpython_bss = VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=None, - ) - - maps = [ - python, - bss, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - libpython, - VirtualMap( - start=1844674407369906, - end=18446744073699069, - filesize=4096, - offset=0, - device="00:00", - flags="---p", - inode=0, - path=None, - ), - libpython_bss, - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython == libpython - assert mapinfo.bss == libpython_bss - assert mapinfo.heap is None - - -def test_maps_for_binary_range(): - # GIVEN - - maps = [ - VirtualMap( - start=1, - end=2, - filesize=1, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ), - VirtualMap( - start=2, - end=3, - filesize=1, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=None, - ), - VirtualMap( - start=5, - end=6, - filesize=1, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("/some/path/to/libpython.so"), - ), - VirtualMap( - start=8, - end=9, - filesize=1, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=None, - ), - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.memory.min_addr == 1 - assert mapinfo.memory.max_addr == 9 - - -def test_maps_for_binary_range_vmaps_are_ignored(): - # GIVEN - - maps = [ - VirtualMap( - start=1, - end=2, - filesize=1, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ), - VirtualMap( - start=2000, - end=3000, - filesize=1000, - offset=0, - device="08:12", - flags="r--p", - inode=8398159, - path=Path("[vsso]"), - ), - VirtualMap( - start=5, - end=6, - filesize=1, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("[vsyscall]"), - ), - VirtualMap( - start=8, - end=9, - filesize=1, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("[vvar]"), - ), - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.memory.min_addr == 1 - assert mapinfo.memory.max_addr == 2 - - -def test_maps_for_binary_no_binary_map(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - maps = [ - python, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - # WHEN / THEN - - with pytest.raises(MissingExecutableMaps): - parse_maps_file_for_binary(Path("another_executable"), maps) - - -def test_maps_for_binary_no_executable_segment(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("the_executable"), - ) - - maps = [ - python, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - # WHEN - - mapinfo = parse_maps_file_for_binary(Path("the_executable"), maps) - - # THEN - - assert mapinfo.python == python - assert mapinfo.libpython is None - assert mapinfo.bss is None - assert mapinfo.heap is None - - -def test_maps_for_binary_multiple_libpythons(): - # GIVEN - - maps = [ - VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("the_executable"), - ), - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libpython3.8.so"), - ), - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libpython2.7.so"), - ), - ] - - # WHEN / THEN - - with pytest.raises(PystackError): - parse_maps_file_for_binary(Path("the_executable"), maps) - - -def test_maps_for_binary_invalid_executable(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=Path("the_executable"), - ) - - maps = [ - python, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - # WHEN - - with pytest.raises(MissingExecutableMaps, match="the_executable"): - parse_maps_file_for_binary(Path("other_executable"), maps) - - -def test_maps_for_binary_invalid_executable_and_no_available_maps(): - # GIVEN - - python = VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=None, - ) - - maps = [ - python, - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - # WHEN - - with pytest.raises( - MissingExecutableMaps, match="There are no available executable maps" - ): - parse_maps_file_for_binary(Path("other_executable"), maps) - - -def test_maps_with_scattered_segments(): - map_text = """ -00400000-00401000 r-xp 00000000 fd:00 67488961 /bin/python3.9-dbg -00600000-00601000 r--p 00000000 fd:00 67488961 /bin/python3.9-dbg -00601000-00602000 rw-p 00001000 fd:00 67488961 /bin/python3.9-dbg -0067b000-00a58000 rw-p 00000000 00:00 0 [heap] -7f7b38000000-7f7b38028000 rw-p 00000000 00:00 0 -7f7b38028000-7f7b3c000000 ---p 00000000 00:00 0 -7f7b40000000-7f7b40021000 rw-p 00000000 00:00 0 -7f7b40021000-7f7b44000000 ---p 00000000 00:00 0 -7f7b44ec0000-7f7b44f40000 rw-p 00000000 00:00 0 -f7b45a61000-7f7b45d93000 rw-p 00000000 00:00 0 -7f7b46014000-7f7b46484000 r--p 0050b000 fd:00 1059871 /lib64/libpython3.9d.so.1.0 -7f7b46484000-7f7b46485000 ---p 00000000 00:00 0 -7f7b46485000-7f7b46cda000 rw-p 00000000 00:00 0 -7f7b46cda000-7f7b46d16000 r--p 00a3d000 fd:00 1059871 /lib64/libpython3.9d.so.1.0 -7f7b46d16000-7f7b46d6f000 rw-p 00000000 00:00 0 -7f7b46d6f000-7f7b46d92000 r--p 00001000 fd:00 67488961 /bin/python3.9-dbg -7f7b46d92000-7f7b46d93000 ---p 00000000 00:00 0 -7f7b46d93000-7f7b475d3000 rw-p 00000000 00:00 0 -7f7b498c1000-7f7b49928000 r-xp 00000000 fd:00 7023 /lib64/libssl.so.1.0.0 -7f7b49928000-7f7b49b28000 ---p 00067000 fd:00 7023 /lib64/libssl.so.1.0.0 -f7b4c632000-7f7b4c6f3000 rw-p 00000000 00:00 0 -7f7b4c6f3000-7f7b4c711000 rw-p 00000000 00:00 0 -7f7b4c711000-7f7b4c712000 r--p 0002a000 fd:00 67488961 /bin/python3.9-dbg -7f7b4c712000-7f7b4c897000 rw-p 00000000 00:00 0 -7f7b5a356000-7f7b5a35d000 r--s 00000000 fd:00 201509519 /usr/lib64/gconv/gconv-modules.cache -7f7b5a35d000-7f7b5a827000 r-xp 00000000 fd:00 1059871 /lib64/libpython3.9d.so.1.0 -7f7b5a827000-7f7b5aa27000 ---p 004ca000 fd:00 1059871 /lib64/libpython3.9d.so.1.0 -7f7b5aa27000-7f7b5aa2c000 r--p 004ca000 fd:00 1059871 /lib64/libpython3.9d.so.1.0 -7f7b5aa2c000-7f7b5aa67000 rw-p 004cf000 fd:00 1059871 /lib64/libpython3.9d.so.1.0 -7f7b5aa67000-7f7b5aa8b000 rw-p 00000000 00:00 0 -7fff26f8e000-7fff27020000 rw-p 00000000 00:00 0 [stack] -7fff27102000-7fff27106000 r--p 00000000 00:00 0 [vvar] -7fff27106000-7fff27108000 r-xp 00000000 00:00 0 [vdso] -ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall] - """ - - # WHEN - - with patch("builtins.open", mock_open(read_data=map_text)): - maps = list(generate_maps_for_process(1)) - - mapinfo = parse_maps_file_for_binary(Path("/bin/python3.9-dbg"), maps) - - # THEN - - assert mapinfo.python == VirtualMap( - start=0x400000, - end=0x401000, - filesize=4096, - offset=0, - device="fd:00", - flags="r-xp", - inode=67488961, - path=Path("/bin/python3.9-dbg"), - ) - assert mapinfo.libpython == VirtualMap( - start=0x7F7B46014000, - end=0x7F7B46484000, - filesize=4653056, - offset=5287936, - device="fd:00", - flags="r--p", - inode=1059871, - path=Path("/lib64/libpython3.9d.so.1.0"), - ) - assert mapinfo.bss == VirtualMap( - start=140167436849152, - end=140167445585920, - filesize=8736768, - offset=0, - device="00:00", - flags="rw-p", - inode=0, - path=None, - ) - assert mapinfo.heap == VirtualMap( - start=0x0067B000, - end=0x00A58000, - filesize=4050944, - offset=0, - device="00:00", - flags="rw-p", - inode=0, - path=Path("[heap]"), - ) - - -def test_get_base_map_path_existing(): - # GIVEN - maps = [ - VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=None, - ), - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=Path("/usr/lib/libc-2.31.so"), - ), - ] - - # WHEN - base_map = _get_base_map(maps) - - # THEN - assert base_map == maps[1] - - -def test_get_base_map_path_not_existing(): - # GIVEN - maps = [ - VirtualMap( - start=140728765599744, - end=140728765603840, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=None, - ), - VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=None, - ), - ] - - # WHEN - base_map = _get_base_map(maps) - - # THEN - assert base_map == maps[0] - - -def test_get_bss_base_map_no_path(): - # GIVEN - map_no_path = VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="--xp", - inode=0, - path=None, - ) - - # WHEN - with patch("pystack.maps._get_base_map", return_value=map_no_path): - bss = _get_bss("elf_maps", "load_point") - - # THEN - assert bss is None - - -def test_get_bss_no_matching_map(): - # GIVEN - libpython = VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("/some/path/to/libpython.so"), - ) - - libpython_bss = VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=None, - ) - maps = [libpython, libpython_bss] - - # WHEN - with patch("pystack._pystack.get_bss_info") as mock_get_bss_info: - mock_get_bss_info.return_value = {"corrected_addr": 100000000} - bss = _get_bss(maps, libpython.start) - - # THEN - assert bss is None - - -def test_get_bss_found_matching_map(): - # GIVEN - libpython = VirtualMap( - start=140728765587456, - end=140728765599744, - filesize=4096, - offset=0, - device="00:00", - flags="r--p", - inode=0, - path=Path("/some/path/to/libpython.so"), - ) - - libpython_bss = VirtualMap( - start=18446744073699065856, - end=18446744073699069952, - filesize=4096, - offset=0, - device="00:00", - flags="r-xp", - inode=0, - path=None, - ) - maps = [libpython, libpython_bss] - - # WHEN - with patch("pystack._pystack.get_bss_info") as mock_get_bss_info: - mock_get_bss_info.return_value = { - "corrected_addr": libpython_bss.start - libpython.start, - "size": libpython_bss.filesize, - } - bss = _get_bss(maps, libpython.start) - - # THEN - assert bss == VirtualMap( - start=libpython_bss.start, - end=libpython_bss.end, - filesize=libpython_bss.filesize, - offset=libpython_bss.offset, - device="", - flags="", - inode=0, - path=None, - ) diff --git a/tests/unit/test_process.py b/tests/unit/test_process.py deleted file mode 100644 index f33ae70e..00000000 --- a/tests/unit/test_process.py +++ /dev/null @@ -1,480 +0,0 @@ -from unittest.mock import Mock -from unittest.mock import mock_open -from unittest.mock import patch - -import pytest - -from pystack.errors import InvalidPythonProcess -from pystack.maps import VirtualMap -from pystack.process import BINARY_REGEXP -from pystack.process import LIBPYTHON_REGEXP -from pystack.process import VERSION_REGEXP -from pystack.process import get_python_version_for_core -from pystack.process import get_python_version_for_process -from pystack.process import scan_core_bss_for_python_version -from pystack.process import scan_process_bss_for_python_version - - -@pytest.mark.parametrize( - "text, version", - [ - ("libpython3.8.so", (3, 8)), - ("libpython3.5.12.so", (3, 5)), - ("libpython3.8m.so", (3, 8)), - ("libpython3.8d.so", (3, 8)), - ("libpython3.8dm.so", (3, 8)), - ("libpython2.7.so.1", (2, 7)), - ("libpython2.7.so.1.0", (2, 7)), - ("LIBPYTHON3.8.so", (3, 8)), - ("LiBpYtHoN3.6.so", (3, 6)), - ], -) -def test_libpython_detection(text, version): - # GIVEN / WHEN - result = LIBPYTHON_REGEXP.match(text) - - # THEN - assert result - - major, minor = version - assert int(result.group("major")) == major - assert int(result.group("minor")) == minor - - -@pytest.mark.parametrize( - "text", ["libpython.so", "libpython.so.1.0", "libpythondm.so.1.0"] -) -def test_libpython_false_cases(text): - # GIVEN / WHEN - result = LIBPYTHON_REGEXP.match(text) - - # THEN - assert result is None - - -@pytest.mark.parametrize( - "text, version", - [ - ("python3.8", (3, 8)), - ("python3.5.1.2", (3, 5)), - ("python2.7.exe", (2, 7)), - ("Python3.6", (3, 6)), - ("PyThOn3.5", (3, 5)), - ], -) -def test_executable_detection(text, version): - # GIVEN / WHEN - result = BINARY_REGEXP.match(text) - - # THEN - assert result - - major, minor = version - assert int(result.group("major")) == major - assert int(result.group("minor")) == minor - - -@pytest.mark.parametrize("text", ["cat3.8", "python3", "python2"]) -def test_executable_false_cases(text): - # GIVEN / WHEN - result = BINARY_REGEXP.match(text) - - # THEN - assert result is None - - -@pytest.mark.parametrize( - "text, version", - [ - ("Python 3.8.2", (3, 8)), - ("Python 3.8.1rc2", (3, 8)), - ("Python 3.9.0b4", (3, 9)), - ("Python 2.7.16", (2, 7)), - ], -) -def test_version_detection(text, version): - # GIVEN / WHEN - result = VERSION_REGEXP.match(text) - - # THEN - assert result - - major, minor = version - assert int(result.group("major")) == major - assert int(result.group("minor")) == minor - - -def test_get_python_version_for_process_fallback_bss(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_process_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = (3, 8) - major, minor = get_python_version_for_process(0, mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_not_called() - binary_regexp_mock.assert_not_called() - subprocess_mock.assert_not_called() - - -def test_get_python_version_for_process_fallback_libpython_regexp(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_process_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - match = Mock() - match.group.side_effect = [3, 8] - libpython_regexp_mock.match.return_value = match - major, minor = get_python_version_for_process(0, mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_called_once() - binary_regexp_mock.assert_not_called() - subprocess_mock.assert_not_called() - - -def test_get_python_version_for_process_fallback_binary_regexp(): - # GIVEN - mapinfo = Mock() - mapinfo.libpython = None - - # WHEN - with patch( - "pystack.process.scan_process_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - libpython_regexp_mock.match.return_value = None - match = Mock() - match.group.side_effect = [3, 8] - binary_regexp_mock.match.return_value = match - major, minor = get_python_version_for_process(0, mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_not_called() - binary_regexp_mock.match.assert_called_once() - subprocess_mock.assert_not_called() - - -def test_get_python_version_for_process_fallback_version_regexp(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_process_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - libpython_regexp_mock.match.return_value = None - subprocess_mock.return_value = "Python 3.8.3" - major, minor = get_python_version_for_process(0, mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_called_once() - binary_regexp_mock.match.asser_not_called() - subprocess_mock.assert_called_once() - - -def test_get_python_version_for_process_fallback_failure(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_process_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ), patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - libpython_regexp_mock.match.return_value = None - subprocess_mock.return_value = "" - # THEN - with pytest.raises(InvalidPythonProcess): - get_python_version_for_process(0, mapinfo) - - -def test_get_python_version_for_core_fallback_bss(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_core_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = (3, 8) - major, minor = get_python_version_for_core("corefile", "executable", mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_not_called() - binary_regexp_mock.assert_not_called() - subprocess_mock.assert_not_called() - - -def test_get_python_version_for_core_fallback_no_bss(): - # GIVEN - mapinfo = Mock() - mapinfo.bss = None - - # WHEN - with patch( - "pystack.process.scan_core_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - match = Mock() - match.group.side_effect = [3, 8] - libpython_regexp_mock.match.return_value = match - major, minor = get_python_version_for_core("corefile", "executable", mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_not_called() - libpython_regexp_mock.match.assert_called_once() - binary_regexp_mock.assert_not_called() - subprocess_mock.assert_not_called() - - -def test_get_python_version_for_core_fallback_libpython_regexp(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_core_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - match = Mock() - match.group.side_effect = [3, 8] - libpython_regexp_mock.match.return_value = match - major, minor = get_python_version_for_core("corefile", "executable", mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_called_once() - binary_regexp_mock.assert_not_called() - subprocess_mock.assert_not_called() - - -def test_get_python_version_for_core_fallback_binary_regexp(): - # GIVEN - mapinfo = Mock() - mapinfo.libpython = None - - # WHEN - with patch( - "pystack.process.scan_core_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - libpython_regexp_mock.match.return_value = None - match = Mock() - match.group.side_effect = [3, 8] - binary_regexp_mock.match.return_value = match - major, minor = get_python_version_for_core("corefile", "executable", mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_not_called() - binary_regexp_mock.match.assert_called_once() - subprocess_mock.assert_not_called() - - -def test_get_python_version_for_core_fallback_version_regexp(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_core_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ) as binary_regexp_mock, patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - libpython_regexp_mock.match.return_value = None - subprocess_mock.return_value = "Python 3.8.3" - major, minor = get_python_version_for_core("corefile", "executable", mapinfo) - - # THEN - assert (major, minor) == (3, 8) - scan_bss_mock.assert_called_once() - libpython_regexp_mock.match.assert_called_once() - binary_regexp_mock.match.asser_not_called() - subprocess_mock.assert_called_once() - - -def test_get_python_version_for_core_fallback_falure(): - # GIVEN - mapinfo = Mock() - - # WHEN - with patch( - "pystack.process.scan_core_bss_for_python_version" - ) as scan_bss_mock, patch( - "pystack.process.LIBPYTHON_REGEXP" - ) as libpython_regexp_mock, patch( - "pystack.process.BINARY_REGEXP" - ), patch( - "subprocess.check_output" - ) as subprocess_mock: - scan_bss_mock.return_value = None - libpython_regexp_mock.match.return_value = None - subprocess_mock.return_value = "" - # THEN - with pytest.raises(InvalidPythonProcess): - get_python_version_for_core("corefile", "executable", mapinfo) - - -def test_scan_process_bss_for_python_version(): - # GIVEM - - memory = ( - b"garbagegarbagePython 3.8.3 (default, May 22 2020, 23:30:25)garbagegarbage" - ) - bss = Mock() - # WHEN - - with patch("pystack._pystack.copy_memory_from_address", return_value=memory): - major, minor = scan_process_bss_for_python_version(0, bss) - - # THEN - - assert major == 3 - assert minor == 8 - - -def test_scan_process_bss_for_python_version_failure(): - # GIVEM - - memory = b"garbagegarbagegarbagegarbage" - bss = Mock() - # WHEN - - with patch("pystack._pystack.copy_memory_from_address", return_value=memory): - result = scan_process_bss_for_python_version(0, bss) - - # THEN - - assert result is None - - -def test_scan_core_bss_for_python_version(): - # GIVEM - - memory = ( - b"garbagegarbagePython 3.8.3 (default, May 22 2020, 23:30:25)garbagegarbage" - ) - bss = VirtualMap( - start=0, - end=len(memory), - filesize=len(memory), - offset=0, - flags="", - inode=0, - device="", - path=None, - ) - # WHEN - - with patch("builtins.open", mock_open(read_data=memory)): - major, minor = scan_core_bss_for_python_version("corefile", bss) - - # THEN - - assert major == 3 - assert minor == 8 - - -def test_scan_core_bss_for_python_version_failure(): - # GIVEM - - memory = b"garbagegarbagegarbagegarbage" - bss = VirtualMap( - start=0, - end=len(memory), - filesize=len(memory), - offset=0, - flags="", - inode=0, - device="", - path=None, - ) - # WHEN - - with patch("builtins.open", mock_open(read_data=memory)): - result = scan_core_bss_for_python_version("corefile", bss) - - # THEN - - assert result is None