diff --git a/.github/CONTRIBUTING.rst b/.github/CONTRIBUTING.rst index 224061f2c5a350..94b67ce3dbe341 100644 --- a/.github/CONTRIBUTING.rst +++ b/.github/CONTRIBUTING.rst @@ -28,13 +28,12 @@ Please be aware that our workflow does deviate slightly from the typical GitHub project. Details on how to properly submit a pull request are covered in `Lifecycle of a Pull Request `_. We utilize various bots and status checks to help with this, so do follow the -comments they leave and their "Details" links, respectively. The key points of -our workflow that are not covered by a bot or status check are: +comments they leave and their "Details" links, respectively. -- All discussions that are not directly related to the code in the pull request - should happen on `GitHub Issues `_. -- Upon your first non-trivial pull request (which includes documentation changes), - feel free to add yourself to ``Misc/ACKS``. +The final key part of our workflow is that all discussions that are not +directly related to the code in the pull request should happen on +`GitHub Issues `__, generally in the +pull request's parent issue. Setting Expectations diff --git a/.github/dependabot.yml b/.github/dependabot.yml index c8a3165d690364..7f3376f8ddb1e2 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -12,6 +12,11 @@ updates: update-types: - "version-update:semver-minor" - "version-update:semver-patch" + cooldown: + # https://blog.yossarian.net/2025/11/21/We-should-all-be-using-dependency-cooldowns + # Cooldowns protect against supply chain attacks by avoiding the + # highest-risk window immediately after new releases. + default-days: 14 - package-ecosystem: "pip" directory: "/Tools/" schedule: @@ -19,3 +24,5 @@ updates: labels: - "skip issue" - "skip news" + cooldown: + default-days: 14 diff --git a/.github/workflows/reusable-wasi.yml b/.github/workflows/reusable-wasi.yml index a309ef4e7f4485..91d76fd1b5f8c5 100644 --- a/.github/workflows/reusable-wasi.yml +++ b/.github/workflows/reusable-wasi.yml @@ -13,7 +13,7 @@ jobs: timeout-minutes: 60 env: WASMTIME_VERSION: 38.0.3 - WASI_SDK_VERSION: 25 + WASI_SDK_VERSION: 29 WASI_SDK_PATH: /opt/wasi-sdk CROSS_BUILD_PYTHON: cross-build/build CROSS_BUILD_WASI: cross-build/wasm32-wasip1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b0311f052798ad..c5767ee841eb0d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,6 +2,10 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.13.2 hooks: + - id: ruff-check + name: Run Ruff (lint) on Apple/ + args: [--exit-non-zero-on-fix, --config=Apple/.ruff.toml] + files: ^Apple/ - id: ruff-check name: Run Ruff (lint) on Doc/ args: [--exit-non-zero-on-fix] @@ -30,6 +34,10 @@ repos: name: Run Ruff (lint) on Tools/wasm/ args: [--exit-non-zero-on-fix, --config=Tools/wasm/.ruff.toml] files: ^Tools/wasm/ + - id: ruff-format + name: Run Ruff (format) on Apple/ + args: [--exit-non-zero-on-fix, --config=Apple/.ruff.toml] + files: ^Apple - id: ruff-format name: Run Ruff (format) on Doc/ args: [--check] diff --git a/Apple/.ruff.toml b/Apple/.ruff.toml new file mode 100644 index 00000000000000..4cdc39ebee4be9 --- /dev/null +++ b/Apple/.ruff.toml @@ -0,0 +1,22 @@ +extend = "../.ruff.toml" # Inherit the project-wide settings + +[format] +preview = true +docstring-code-format = true + +[lint] +select = [ + "C4", # flake8-comprehensions + "E", # pycodestyle + "F", # pyflakes + "I", # isort + "ISC", # flake8-implicit-str-concat + "LOG", # flake8-logging + "PGH", # pygrep-hooks + "PT", # flake8-pytest-style + "PYI", # flake8-pyi + "RUF100", # Ban unused `# noqa` comments + "UP", # pyupgrade + "W", # pycodestyle + "YTT", # flake8-2020 +] diff --git a/Apple/__main__.py b/Apple/__main__.py index e76fc351798707..256966e76c2c97 100644 --- a/Apple/__main__.py +++ b/Apple/__main__.py @@ -46,13 +46,12 @@ import sys import sysconfig import time -from collections.abc import Sequence +from collections.abc import Callable, Sequence from contextlib import contextmanager from datetime import datetime, timezone from os.path import basename, relpath from pathlib import Path from subprocess import CalledProcessError -from typing import Callable EnvironmentT = dict[str, str] ArgsT = Sequence[str | Path] @@ -140,17 +139,15 @@ def print_env(env: EnvironmentT) -> None: def apple_env(host: str) -> EnvironmentT: """Construct an Apple development environment for the given host.""" env = { - "PATH": ":".join( - [ - str(PYTHON_DIR / "Apple/iOS/Resources/bin"), - str(subdir(host) / "prefix"), - "/usr/bin", - "/bin", - "/usr/sbin", - "/sbin", - "/Library/Apple/usr/bin", - ] - ), + "PATH": ":".join([ + str(PYTHON_DIR / "Apple/iOS/Resources/bin"), + str(subdir(host) / "prefix"), + "/usr/bin", + "/bin", + "/usr/sbin", + "/sbin", + "/Library/Apple/usr/bin", + ]), } return env @@ -196,14 +193,10 @@ def clean(context: argparse.Namespace, target: str = "all") -> None: paths.append(target) if target in {"all", "hosts", "test"}: - paths.extend( - [ - path.name - for path in CROSS_BUILD_DIR.glob( - f"{context.platform}-testbed.*" - ) - ] - ) + paths.extend([ + path.name + for path in CROSS_BUILD_DIR.glob(f"{context.platform}-testbed.*") + ]) for path in paths: delete_path(path) @@ -352,18 +345,16 @@ def download(url: str, target_dir: Path) -> Path: out_path = target_path / basename(url) if not Path(out_path).is_file(): - run( - [ - "curl", - "-Lf", - "--retry", - "5", - "--retry-all-errors", - "-o", - out_path, - url, - ] - ) + run([ + "curl", + "-Lf", + "--retry", + "5", + "--retry-all-errors", + "-o", + out_path, + url, + ]) else: print(f"Using cached version of {basename(url)}") return out_path @@ -468,8 +459,7 @@ def package_version(prefix_path: Path) -> str: def lib_platform_files(dirname, names): - """A file filter that ignores platform-specific files in the lib directory. - """ + """A file filter that ignores platform-specific files in lib.""" path = Path(dirname) if ( path.parts[-3] == "lib" @@ -478,7 +468,7 @@ def lib_platform_files(dirname, names): ): return names elif path.parts[-2] == "lib" and path.parts[-1].startswith("python"): - ignored_names = set( + ignored_names = { name for name in names if ( @@ -486,7 +476,13 @@ def lib_platform_files(dirname, names): or name.startswith("_sysconfig_vars_") or name == "build-details.json" ) - ) + } + elif path.parts[-1] == "lib": + ignored_names = { + name + for name in names + if name.startswith("libpython") and name.endswith(".dylib") + } else: ignored_names = set() @@ -499,7 +495,9 @@ def lib_non_platform_files(dirname, names): """ path = Path(dirname) if path.parts[-2] == "lib" and path.parts[-1].startswith("python"): - return set(names) - lib_platform_files(dirname, names) - {"lib-dynload"} + return ( + set(names) - lib_platform_files(dirname, names) - {"lib-dynload"} + ) else: return set() @@ -514,7 +512,8 @@ def create_xcframework(platform: str) -> str: package_path.mkdir() except FileExistsError: raise RuntimeError( - f"{platform} XCframework already exists; do you need to run with --clean?" + f"{platform} XCframework already exists; do you need to run " + "with --clean?" ) from None frameworks = [] @@ -607,7 +606,7 @@ def create_xcframework(platform: str) -> str: print(f" - {slice_name} binaries") shutil.copytree(first_path / "bin", slice_path / "bin") - # Copy the include path (this will be a symlink to the framework headers) + # Copy the include path (a symlink to the framework headers) print(f" - {slice_name} include files") shutil.copytree( first_path / "include", @@ -621,6 +620,12 @@ def create_xcframework(platform: str) -> str: slice_framework / "Headers/pyconfig.h", ) + print(f" - {slice_name} shared library") + # Create a simlink for the fat library + shared_lib = slice_path / f"lib/libpython{version_tag}.dylib" + shared_lib.parent.mkdir() + shared_lib.symlink_to("../Python.framework/Python") + print(f" - {slice_name} architecture-specific files") for host_triple, multiarch in slice_parts.items(): print(f" - {multiarch} standard library") @@ -632,6 +637,7 @@ def create_xcframework(platform: str) -> str: framework_path(host_triple, multiarch) / "lib", package_path / "Python.xcframework/lib", ignore=lib_platform_files, + symlinks=True, ) has_common_stdlib = True @@ -639,6 +645,7 @@ def create_xcframework(platform: str) -> str: framework_path(host_triple, multiarch) / "lib", slice_path / f"lib-{arch}", ignore=lib_non_platform_files, + symlinks=True, ) # Copy the host's pyconfig.h to an architecture-specific name. @@ -659,7 +666,8 @@ def create_xcframework(platform: str) -> str: # statically link those libraries into a Framework, you become # responsible for providing a privacy manifest for that framework. xcprivacy_file = { - "OpenSSL": subdir(host_triple) / "prefix/share/OpenSSL.xcprivacy" + "OpenSSL": subdir(host_triple) + / "prefix/share/OpenSSL.xcprivacy" } print(f" - {multiarch} xcprivacy files") for module, lib in [ @@ -669,7 +677,8 @@ def create_xcframework(platform: str) -> str: shutil.copy( xcprivacy_file[lib], slice_path - / f"lib-{arch}/python{version_tag}/lib-dynload/{module}.xcprivacy", + / f"lib-{arch}/python{version_tag}" + / f"lib-dynload/{module}.xcprivacy", ) print(" - build tools") @@ -692,18 +701,16 @@ def package(context: argparse.Namespace) -> None: # Clone testbed print() - run( - [ - sys.executable, - "Apple/testbed", - "clone", - "--platform", - context.platform, - "--framework", - CROSS_BUILD_DIR / context.platform / "Python.xcframework", - CROSS_BUILD_DIR / context.platform / "testbed", - ] - ) + run([ + sys.executable, + "Apple/testbed", + "clone", + "--platform", + context.platform, + "--framework", + CROSS_BUILD_DIR / context.platform / "Python.xcframework", + CROSS_BUILD_DIR / context.platform / "testbed", + ]) # Build the final archive archive_name = ( @@ -757,7 +764,7 @@ def build(context: argparse.Namespace, host: str | None = None) -> None: package(context) -def test(context: argparse.Namespace, host: str | None = None) -> None: +def test(context: argparse.Namespace, host: str | None = None) -> None: # noqa: PT028 """The implementation of the "test" command.""" if host is None: host = context.host @@ -795,18 +802,16 @@ def test(context: argparse.Namespace, host: str | None = None) -> None: / f"Frameworks/{apple_multiarch(host)}" ) - run( - [ - sys.executable, - "Apple/testbed", - "clone", - "--platform", - context.platform, - "--framework", - framework_path, - testbed_dir, - ] - ) + run([ + sys.executable, + "Apple/testbed", + "clone", + "--platform", + context.platform, + "--framework", + framework_path, + testbed_dir, + ]) run( [ @@ -840,7 +845,7 @@ def apple_sim_host(platform_name: str) -> str: """Determine the native simulator target for this platform.""" for _, slice_parts in HOSTS[platform_name].items(): for host_triple in slice_parts: - parts = host_triple.split('-') + parts = host_triple.split("-") if parts[0] == platform.machine() and parts[-1] == "simulator": return host_triple @@ -968,20 +973,29 @@ def parse_args() -> argparse.Namespace: cmd.add_argument( "--simulator", help=( - "The name of the simulator to use (eg: 'iPhone 16e'). Defaults to " - "the most recently released 'entry level' iPhone device. Device " - "architecture and OS version can also be specified; e.g., " - "`--simulator 'iPhone 16 Pro,arch=arm64,OS=26.0'` would run on " - "an ARM64 iPhone 16 Pro simulator running iOS 26.0." + "The name of the simulator to use (eg: 'iPhone 16e'). " + "Defaults to the most recently released 'entry level' " + "iPhone device. Device architecture and OS version can also " + "be specified; e.g., " + "`--simulator 'iPhone 16 Pro,arch=arm64,OS=26.0'` would " + "run on an ARM64 iPhone 16 Pro simulator running iOS 26.0." ), ) group = cmd.add_mutually_exclusive_group() group.add_argument( - "--fast-ci", action="store_const", dest="ci_mode", const="fast", - help="Add test arguments for GitHub Actions") + "--fast-ci", + action="store_const", + dest="ci_mode", + const="fast", + help="Add test arguments for GitHub Actions", + ) group.add_argument( - "--slow-ci", action="store_const", dest="ci_mode", const="slow", - help="Add test arguments for buildbots") + "--slow-ci", + action="store_const", + dest="ci_mode", + const="slow", + help="Add test arguments for buildbots", + ) for subcommand in [configure_build, configure_host, build, ci]: subcommand.add_argument( diff --git a/Apple/testbed/Python.xcframework/build/utils.sh b/Apple/testbed/Python.xcframework/build/utils.sh index 961c46d014b5f5..e7155d8b30e213 100755 --- a/Apple/testbed/Python.xcframework/build/utils.sh +++ b/Apple/testbed/Python.xcframework/build/utils.sh @@ -46,7 +46,8 @@ install_stdlib() { rsync -au --delete "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/lib/" "$CODESIGNING_FOLDER_PATH/python/lib/" rsync -au "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/$SLICE_FOLDER/lib-$ARCHS/" "$CODESIGNING_FOLDER_PATH/python/lib/" else - rsync -au --delete "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/$SLICE_FOLDER/lib/" "$CODESIGNING_FOLDER_PATH/python/lib/" + # A single-arch framework will have a libpython symlink; that can't be included at runtime + rsync -au --delete "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/$SLICE_FOLDER/lib/" "$CODESIGNING_FOLDER_PATH/python/lib/" --exclude 'libpython*.dylib' fi } diff --git a/Apple/testbed/__main__.py b/Apple/testbed/__main__.py index 49974cb142853c..0dd77ab8b82797 100644 --- a/Apple/testbed/__main__.py +++ b/Apple/testbed/__main__.py @@ -32,15 +32,15 @@ def select_simulator_device(platform): json_data = json.loads(raw_json) if platform == "iOS": - # Any iOS device will do; we'll look for "SE" devices - but the name isn't - # consistent over time. Older Xcode versions will use "iPhone SE (Nth - # generation)"; As of 2025, they've started using "iPhone 16e". + # Any iOS device will do; we'll look for "SE" devices - but the name + # isn't consistent over time. Older Xcode versions will use "iPhone SE + # (Nth generation)"; As of 2025, they've started using "iPhone 16e". # - # When Xcode is updated after a new release, new devices will be available - # and old ones will be dropped from the set available on the latest iOS - # version. Select the one with the highest minimum runtime version - this - # is an indicator of the "newest" released device, which should always be - # supported on the "most recent" iOS version. + # When Xcode is updated after a new release, new devices will be + # available and old ones will be dropped from the set available on the + # latest iOS version. Select the one with the highest minimum runtime + # version - this is an indicator of the "newest" released device, which + # should always be supported on the "most recent" iOS version. se_simulators = sorted( (devicetype["minRuntimeVersion"], devicetype["name"]) for devicetype in json_data["devicetypes"] @@ -295,7 +295,8 @@ def main(): parser = argparse.ArgumentParser( description=( - "Manages the process of testing an Apple Python project through Xcode." + "Manages the process of testing an Apple Python project " + "through Xcode." ), ) @@ -336,7 +337,10 @@ def main(): run = subcommands.add_parser( "run", - usage="%(prog)s [-h] [--simulator SIMULATOR] -- [ ...]", + usage=( + "%(prog)s [-h] [--simulator SIMULATOR] -- " + " [ ...]" + ), description=( "Run a testbed project. The arguments provided after `--` will be " "passed to the running iOS process as if they were arguments to " @@ -397,9 +401,9 @@ def main(): / "bin" ).is_dir(): print( - f"Testbed does not contain a compiled Python framework. Use " - f"`python {sys.argv[0]} clone ...` to create a runnable " - f"clone of this testbed." + "Testbed does not contain a compiled Python framework. " + f"Use `python {sys.argv[0]} clone ...` to create a " + "runnable clone of this testbed." ) sys.exit(20) @@ -411,7 +415,8 @@ def main(): ) else: print( - f"Must specify test arguments (e.g., {sys.argv[0]} run -- test)" + "Must specify test arguments " + f"(e.g., {sys.argv[0]} run -- test)" ) print() parser.print_help(sys.stderr) diff --git a/Doc/about.rst b/Doc/about.rst index 8f635d7f743a98..5c1b497ca6bcea 100644 --- a/Doc/about.rst +++ b/Doc/about.rst @@ -32,8 +32,9 @@ Contributors to the Python documentation ---------------------------------------- Many people have contributed to the Python language, the Python standard -library, and the Python documentation. See :source:`Misc/ACKS` in the Python -source distribution for a partial list of contributors. +library, and the Python documentation. See the `CPython +GitHub repository `__ +for a partial list of contributors. It is only with the input and contributions of the Python community that Python has such wonderful documentation -- Thank You! diff --git a/Doc/c-api/conversion.rst b/Doc/c-api/conversion.rst index a18bbf4e0e37d7..96078d22710527 100644 --- a/Doc/c-api/conversion.rst +++ b/Doc/c-api/conversion.rst @@ -162,16 +162,33 @@ The following functions provide locale-independent string to number conversions. .. versionadded:: 3.1 -.. c:function:: int PyOS_stricmp(const char *s1, const char *s2) +.. c:function:: int PyOS_mystricmp(const char *str1, const char *str2) + int PyOS_mystrnicmp(const char *str1, const char *str2, Py_ssize_t size) - Case insensitive comparison of strings. The function works almost - identically to :c:func:`!strcmp` except that it ignores the case. + Case insensitive comparison of strings. These functions work almost + identically to :c:func:`!strcmp` and :c:func:`!strncmp` (respectively), + except that they ignore the case of ASCII characters. + Return ``0`` if the strings are equal, a negative value if *str1* sorts + lexicographically before *str2*, or a positive value if it sorts after. -.. c:function:: int PyOS_strnicmp(const char *s1, const char *s2, Py_ssize_t size) + In the *str1* or *str2* arguments, a NUL byte marks the end of the string. + For :c:func:`!PyOS_mystrnicmp`, the *size* argument gives the maximum size + of the string, as if NUL was present at the index given by *size*. - Case insensitive comparison of strings. The function works almost - identically to :c:func:`!strncmp` except that it ignores the case. + These functions do not use the locale. + + +.. c:function:: int PyOS_stricmp(const char *str1, const char *str2) + int PyOS_strnicmp(const char *str1, const char *str2, Py_ssize_t size) + + Case insensitive comparison of strings. + + On Windows, these are aliases of :c:func:`!stricmp` and :c:func:`!strnicmp`, + respectively. + + On other platforms, they are aliases of :c:func:`PyOS_mystricmp` and + :c:func:`PyOS_mystrnicmp`, respectively. Character classification and conversion diff --git a/Doc/c-api/datetime.rst b/Doc/c-api/datetime.rst index f311aad5f15499..127d7c9c91a3d5 100644 --- a/Doc/c-api/datetime.rst +++ b/Doc/c-api/datetime.rst @@ -8,11 +8,42 @@ DateTime Objects Various date and time objects are supplied by the :mod:`datetime` module. Before using any of these functions, the header file :file:`datetime.h` must be included in your source (note that this is not included by :file:`Python.h`), -and the macro :c:macro:`!PyDateTime_IMPORT` must be invoked, usually as part of +and the macro :c:macro:`PyDateTime_IMPORT` must be invoked, usually as part of the module initialisation function. The macro puts a pointer to a C structure -into a static variable, :c:data:`!PyDateTimeAPI`, that is used by the following +into a static variable, :c:data:`PyDateTimeAPI`, that is used by the following macros. +.. c:macro:: PyDateTime_IMPORT() + + Import the datetime C API. + + On success, populate the :c:var:`PyDateTimeAPI` pointer. + On failure, set :c:var:`PyDateTimeAPI` to ``NULL`` and set an exception. + The caller must check if an error occurred via :c:func:`PyErr_Occurred`: + + .. code-block:: + + PyDateTime_IMPORT; + if (PyErr_Occurred()) { /* cleanup */ } + + .. warning:: + + This is not compatible with subinterpreters. + +.. c:type:: PyDateTime_CAPI + + Structure containing the fields for the datetime C API. + + The fields of this structure are private and subject to change. + + Do not use this directly; prefer ``PyDateTime_*`` APIs instead. + +.. c:var:: PyDateTime_CAPI *PyDateTimeAPI + + Dynamically allocated object containing the datetime C API. + + This variable is only available once :c:macro:`PyDateTime_IMPORT` succeeds. + .. c:type:: PyDateTime_Date This subtype of :c:type:`PyObject` represents a Python date object. @@ -325,3 +356,16 @@ Macros for the convenience of modules implementing the DB API: Create and return a new :class:`datetime.date` object given an argument tuple suitable for passing to :meth:`datetime.date.fromtimestamp`. + + +Internal data +------------- + +The following symbols are exposed by the C API but should be considered +internal-only. + +.. c:macro:: PyDateTime_CAPSULE_NAME + + Name of the datetime capsule to pass to :c:func:`PyCapsule_Import`. + + Internal usage only. Use :c:macro:`PyDateTime_IMPORT` instead. diff --git a/Doc/c-api/extension-modules.rst b/Doc/c-api/extension-modules.rst index 3d331e6ec12f76..0ce173b4bfea7c 100644 --- a/Doc/c-api/extension-modules.rst +++ b/Doc/c-api/extension-modules.rst @@ -8,7 +8,8 @@ Defining extension modules A C extension for CPython is a shared library (for example, a ``.so`` file on Linux, ``.pyd`` DLL on Windows), which is loadable into the Python process (for example, it is compiled with compatible compiler settings), and which -exports an :ref:`initialization function `. +exports an :dfn:`export hook` function (or an +old-style :ref:`initialization function `). To be importable by default (that is, by :py:class:`importlib.machinery.ExtensionFileLoader`), @@ -23,25 +24,127 @@ and must be named after the module name plus an extension listed in One suitable tool is Setuptools, whose documentation can be found at https://setuptools.pypa.io/en/latest/setuptools.html. -Normally, the initialization function returns a module definition initialized -using :c:func:`PyModuleDef_Init`. -This allows splitting the creation process into several phases: +.. _extension-export-hook: + +Extension export hook +..................... + +.. versionadded:: next + + Support for the :samp:`PyModExport_{}` export hook was added in Python + 3.15. The older way of defining modules is still available: consult either + the :ref:`extension-pyinit` section or earlier versions of this + documentation if you plan to support earlier Python versions. + +The export hook must be an exported function with the following signature: + +.. c:function:: PyModuleDef_Slot *PyModExport_modulename(void) + +For modules with ASCII-only names, the :ref:`export hook ` +must be named :samp:`PyModExport_{}`, +with ```` replaced by the module's name. + +For non-ASCII module names, the export hook must instead be named +:samp:`PyModExportU_{}` (note the ``U``), with ```` encoded using +Python's *punycode* encoding with hyphens replaced by underscores. In Python: + +.. code-block:: python + + def hook_name(name): + try: + suffix = b'_' + name.encode('ascii') + except UnicodeEncodeError: + suffix = b'U_' + name.encode('punycode').replace(b'-', b'_') + return b'PyModExport' + suffix + +The export hook returns an array of :c:type:`PyModuleDef_Slot` entries, +terminated by an entry with a slot ID of ``0``. +These slots describe how the module should be created and initialized. + +This array must remain valid and constant until interpreter shutdown. +Typically, it should use ``static`` storage. +Prefer using the :c:macro:`Py_mod_create` and :c:macro:`Py_mod_exec` slots +for any dynamic behavior. + +The export hook may return ``NULL`` with an exception set to signal failure. + +It is recommended to define the export hook function using a helper macro: + +.. c:macro:: PyMODEXPORT_FUNC + + Declare an extension module export hook. + This macro: + + * specifies the :c:expr:`PyModuleDef_Slot*` return type, + * adds any special linkage declarations required by the platform, and + * for C++, declares the function as ``extern "C"``. +For example, a module called ``spam`` would be defined like this:: + + PyABIInfo_VAR(abi_info); + + static PyModuleDef_Slot spam_slots[] = { + {Py_mod_abi, &abi_info}, + {Py_mod_name, "spam"}, + {Py_mod_init, spam_init_function}, + ... + {0, NULL}, + }; + + PyMODEXPORT_FUNC + PyModExport_spam(void) + { + return spam_slots; + } + +The export hook is typically the only non-\ ``static`` +item defined in the module's C source. + +The hook should be kept short -- ideally, one line as above. +If you do need to use Python C API in this function, it is recommended to call +``PyABIInfo_Check(&abi_info, "modulename")`` first to raise an exception, +rather than crash, in common cases of ABI mismatch. + + +.. note:: + + It is possible to export multiple modules from a single shared library by + defining multiple export hooks. + However, importing them requires a custom importer or suitably named + copies/links of the extension file, because Python's import machinery only + finds the function corresponding to the filename. + See the `Multiple modules in one library `__ + section in :pep:`489` for details. + + +.. _multi-phase-initialization: + +Multi-phase initialization +.......................... + +The process of creating an extension module follows several phases: + +- Python finds and calls the export hook to get information on how to + create the module. - Before any substantial code is executed, Python can determine which capabilities the module supports, and it can adjust the environment or refuse loading an incompatible extension. -- By default, Python itself creates the module object -- that is, it does - the equivalent of :py:meth:`object.__new__` for classes. - It also sets initial attributes like :attr:`~module.__package__` and - :attr:`~module.__loader__`. -- Afterwards, the module object is initialized using extension-specific - code -- the equivalent of :py:meth:`~object.__init__` on classes. + Slots like :c:data:`Py_mod_abi`, :c:data:`Py_mod_gil` and + :c:data:`Py_mod_multiple_interpreters` influence this step. +- By default, Python itself then creates the module object -- that is, it does + the equivalent of calling :py:meth:`~object.__new__` when creating an object. + This step can be overridden using the :c:data:`Py_mod_create` slot. +- Python sets initial module attributes like :attr:`~module.__package__` and + :attr:`~module.__loader__`, and inserts the module object into + :py:attr:`sys.modules`. +- Afterwards, the module object is initialized in an extension-specific way + -- the equivalent of :py:meth:`~object.__init__` when creating an object, + or of executing top-level code in a Python-language module. + The behavior is specified using the :c:data:`Py_mod_exec` slot. This is called *multi-phase initialization* to distinguish it from the legacy -(but still supported) *single-phase initialization* scheme, -where the initialization function returns a fully constructed module. -See the :ref:`single-phase-initialization section below ` -for details. +(but still supported) :ref:`single-phase initialization `, +where an initialization function returns a fully constructed module. .. versionchanged:: 3.5 @@ -53,7 +156,7 @@ Multiple module instances By default, extension modules are not singletons. For example, if the :py:attr:`sys.modules` entry is removed and the module -is re-imported, a new module object is created, and typically populated with +is re-imported, a new module object is created and, typically, populated with fresh method and type objects. The old module is subject to normal garbage collection. This mirrors the behavior of pure-Python modules. @@ -83,36 +186,34 @@ A module may also be limited to the main interpreter using the :c:data:`Py_mod_multiple_interpreters` slot. -.. _extension-export-hook: +.. _extension-pyinit: -Initialization function -....................... +``PyInit`` function +................... -The initialization function defined by an extension module has the -following signature: +.. deprecated:: next + + This functionality is :term:`soft deprecated`. + It will not get new features, but there are no plans to remove it. + +Instead of :c:func:`PyModExport_modulename`, an extension module can define +an older-style :dfn:`initialization function` with the signature: .. c:function:: PyObject* PyInit_modulename(void) Its name should be :samp:`PyInit_{}`, with ```` replaced by the name of the module. +For non-ASCII module names, use :samp:`PyInitU_{}` instead, with +```` encoded in the same way as for the +:ref:`export hook ` (that is, using Punycode +with underscores). -For modules with ASCII-only names, the function must instead be named -:samp:`PyInit_{}`, with ```` replaced by the name of the module. -When using :ref:`multi-phase-initialization`, non-ASCII module names -are allowed. In this case, the initialization function name is -:samp:`PyInitU_{}`, with ```` encoded using Python's -*punycode* encoding with hyphens replaced by underscores. In Python: +If a module exports both :samp:`PyInit_{}` and +:samp:`PyModExport_{}`, the :samp:`PyInit_{}` function +is ignored. -.. code-block:: python - - def initfunc_name(name): - try: - suffix = b'_' + name.encode('ascii') - except UnicodeEncodeError: - suffix = b'U_' + name.encode('punycode').replace(b'-', b'_') - return b'PyInit' + suffix - -It is recommended to define the initialization function using a helper macro: +Like with :c:macro:`PyMODEXPORT_FUNC`, it is recommended to define the +initialization function using a helper macro: .. c:macro:: PyMODINIT_FUNC @@ -123,43 +224,15 @@ It is recommended to define the initialization function using a helper macro: * adds any special linkage declarations required by the platform, and * for C++, declares the function as ``extern "C"``. -For example, a module called ``spam`` would be defined like this:: - - static struct PyModuleDef spam_module = { - .m_base = PyModuleDef_HEAD_INIT, - .m_name = "spam", - ... - }; - - PyMODINIT_FUNC - PyInit_spam(void) - { - return PyModuleDef_Init(&spam_module); - } - -It is possible to export multiple modules from a single shared library by -defining multiple initialization functions. However, importing them requires -using symbolic links or a custom importer, because by default only the -function corresponding to the filename is found. -See the `Multiple modules in one library `__ -section in :pep:`489` for details. - -The initialization function is typically the only non-\ ``static`` -item defined in the module's C source. +Normally, the initialization function (``PyInit_modulename``) returns +a :c:type:`PyModuleDef` instance with non-``NULL`` +:c:member:`~PyModuleDef.m_slots`. This allows Python to use +:ref:`multi-phase initialization `. -.. _multi-phase-initialization: - -Multi-phase initialization -.......................... - -Normally, the :ref:`initialization function ` -(``PyInit_modulename``) returns a :c:type:`PyModuleDef` instance with -non-``NULL`` :c:member:`~PyModuleDef.m_slots`. Before it is returned, the ``PyModuleDef`` instance must be initialized using the following function: - .. c:function:: PyObject* PyModuleDef_Init(PyModuleDef *def) Ensure a module definition is a properly initialized Python object that @@ -167,7 +240,8 @@ using the following function: Return *def* cast to ``PyObject*``, or ``NULL`` if an error occurred. - Calling this function is required for :ref:`multi-phase-initialization`. + Calling this function is required before returning a :c:type:`PyModuleDef` + from a module initialization function. It should not be used in other contexts. Note that Python assumes that ``PyModuleDef`` structures are statically @@ -178,18 +252,37 @@ using the following function: .. versionadded:: 3.5 +For example, a module called ``spam`` would be defined like this:: + + static struct PyModuleDef spam_module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "spam", + ... + }; + + PyMODINIT_FUNC + PyInit_spam(void) + { + return PyModuleDef_Init(&spam_module); + } + + .. _single-phase-initialization: Legacy single-phase initialization -.................................. +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. attention:: - Single-phase initialization is a legacy mechanism to initialize extension +.. deprecated:: next + + Single-phase initialization is :term:`soft deprecated`. + It is a legacy mechanism to initialize extension modules, with known drawbacks and design flaws. Extension module authors are encouraged to use multi-phase initialization instead. -In single-phase initialization, the -:ref:`initialization function ` (``PyInit_modulename``) + However, there are no plans to remove support for it. + +In single-phase initialization, the old-style +:ref:`initializaton function ` (``PyInit_modulename``) should create, populate and return a module object. This is typically done using :c:func:`PyModule_Create` and functions like :c:func:`PyModule_AddObjectRef`. @@ -242,6 +335,8 @@ in the following ways: * Single-phase modules support module lookup functions like :c:func:`PyState_FindModule`. +* The module's :c:member:`PyModuleDef.m_slots` must be NULL. + .. [#testsinglephase] ``_testsinglephase`` is an internal module used in CPython's self-test suite; your installation may or may not include it. diff --git a/Doc/c-api/gen.rst b/Doc/c-api/gen.rst index 0eb5922f6da75f..44f3bdbf959b9c 100644 --- a/Doc/c-api/gen.rst +++ b/Doc/c-api/gen.rst @@ -44,3 +44,41 @@ than explicitly calling :c:func:`PyGen_New` or :c:func:`PyGen_NewWithQualName`. with ``__name__`` and ``__qualname__`` set to *name* and *qualname*. A reference to *frame* is stolen by this function. The *frame* argument must not be ``NULL``. + +.. c:function:: PyCodeObject* PyGen_GetCode(PyGenObject *gen) + + Return a new :term:`strong reference` to the code object wrapped by *gen*. + This function always succeeds. + + +Asynchronous Generator Objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. seealso:: + :pep:`525` + +.. c:var:: PyTypeObject PyAsyncGen_Type + + The type object corresponding to asynchronous generator objects. This is + available as :class:`types.AsyncGeneratorType` in the Python layer. + + .. versionadded:: 3.6 + +.. c:function:: PyObject *PyAsyncGen_New(PyFrameObject *frame, PyObject *name, PyObject *qualname) + + Create a new asynchronous generator wrapping *frame*, with ``__name__`` and + ``__qualname__`` set to *name* and *qualname*. *frame* is stolen by this + function and must not be ``NULL``. + + On success, this function returns a :term:`strong reference` to the + new asynchronous generator. On failure, this function returns ``NULL`` + with an exception set. + + .. versionadded:: 3.6 + +.. c:function:: int PyAsyncGen_CheckExact(PyObject *op) + + Return true if *op* is an asynchronous generator object, false otherwise. + This function always succeeds. + + .. versionadded:: 3.6 diff --git a/Doc/c-api/import.rst b/Doc/c-api/import.rst index 24e673d3d1394f..1786ac6b503895 100644 --- a/Doc/c-api/import.rst +++ b/Doc/c-api/import.rst @@ -314,6 +314,13 @@ Importing Modules initialization. +.. c:var:: struct _inittab *PyImport_Inittab + + The table of built-in modules used by Python initialization. Do not use this directly; + use :c:func:`PyImport_AppendInittab` and :c:func:`PyImport_ExtendInittab` + instead. + + .. c:function:: PyObject* PyImport_ImportModuleAttr(PyObject *mod_name, PyObject *attr_name) Import the module *mod_name* and get its attribute *attr_name*. @@ -346,11 +353,11 @@ Importing Modules the same as for :c:func:`PyImport_AppendInittab`. On success, create and return a module object. - This module will not be initialized; call :c:func:`!PyModule_Exec` + This module will not be initialized; call :c:func:`PyModule_Exec` to initialize it. (Custom importers should do this in their :py:meth:`~importlib.abc.Loader.exec_module` method.) On error, return NULL with an exception set. - .. versionadded:: next + .. versionadded:: 3.15 diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 18ee16118070eb..7411644f9e110b 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -1390,7 +1390,7 @@ All of the following functions must be called after :c:func:`Py_Initialize`. See :c:func:`PyUnstable_ThreadState_ResetStackProtection` for undoing this operation. - .. versionadded:: next + .. versionadded:: 3.15 .. c:function:: void PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate) @@ -1400,7 +1400,7 @@ All of the following functions must be called after :c:func:`Py_Initialize`. See :c:func:`PyUnstable_ThreadState_SetStackProtection` for an explanation. - .. versionadded:: next + .. versionadded:: 3.15 .. c:function:: PyInterpreterState* PyInterpreterState_Get(void) @@ -1717,7 +1717,8 @@ function. You can create and destroy them using the following functions: Only C-level static and global variables are shared between these module objects. - * For modules using single-phase initialization, + * For modules using legacy + :ref:`single-phase initialization `, e.g. :c:func:`PyModule_Create`, the first time a particular extension is imported, it is initialized normally, and a (shallow) copy of its module's dictionary is squirreled away. @@ -1891,6 +1892,25 @@ pointer and a void pointer argument. This function now always schedules *func* to be run in the main interpreter. + +.. c:function:: int Py_MakePendingCalls(void) + + Execute all pending calls. This is usually executed automatically by the + interpreter. + + This function returns ``0`` on success, and returns ``-1`` with an exception + set on failure. + + If this is not called in the main thread of the main + interpreter, this function does nothing and returns ``0``. + The caller must hold an :term:`attached thread state`. + + .. versionadded:: 3.1 + + .. versionchanged:: 3.12 + This function only runs pending calls in the main interpreter. + + .. _profiling: Profiling and Tracing @@ -2520,3 +2540,220 @@ code triggered by the finalizer blocks and calls :c:func:`PyEval_SaveThread`. In the default build, this macro expands to ``}``. .. versionadded:: 3.13 + + +Legacy Locking APIs +------------------- + +These APIs are obsolete since Python 3.13 with the introduction of +:c:type:`PyMutex`. + +.. versionchanged:: 3.15 + These APIs are now a simple wrapper around ``PyMutex``. + + +.. c:type:: PyThread_type_lock + + A pointer to a mutual exclusion lock. + + +.. c:type:: PyLockStatus + + The result of acquiring a lock with a timeout. + + .. c:namespace:: NULL + + .. c:enumerator:: PY_LOCK_FAILURE + + Failed to acquire the lock. + + .. c:enumerator:: PY_LOCK_ACQUIRED + + The lock was successfully acquired. + + .. c:enumerator:: PY_LOCK_INTR + + The lock was interrupted by a signal. + + +.. c:function:: PyThread_type_lock PyThread_allocate_lock(void) + + Allocate a new lock. + + On success, this function returns a lock; on failure, this + function returns ``0`` without an exception set. + + The caller does not need to hold an :term:`attached thread state`. + + .. versionchanged:: 3.15 + This function now always uses :c:type:`PyMutex`. In prior versions, this + would use a lock provided by the operating system. + + +.. c:function:: void PyThread_free_lock(PyThread_type_lock lock) + + Destroy *lock*. The lock should not be held by any thread when calling + this. + + The caller does not need to hold an :term:`attached thread state`. + + +.. c:function:: PyLockStatus PyThread_acquire_lock_timed(PyThread_type_lock lock, long long microseconds, int intr_flag) + + Acquire *lock* with a timeout. + + This will wait for *microseconds* microseconds to acquire the lock. If the + timeout expires, this function returns :c:enumerator:`PY_LOCK_FAILURE`. + If *microseconds* is ``-1``, this will wait indefinitely until the lock has + been released. + + If *intr_flag* is ``1``, acquiring the lock may be interrupted by a signal, + in which case this function returns :c:enumerator:`PY_LOCK_INTR`. Upon + interruption, it's generally expected that the caller makes a call to + :c:func:`Py_MakePendingCalls` to propagate an exception to Python code. + + If the lock is successfully acquired, this function returns + :c:enumerator:`PY_LOCK_ACQUIRED`. + + The caller does not need to hold an :term:`attached thread state`. + + +.. c:function:: int PyThread_acquire_lock(PyThread_type_lock lock, int waitflag) + + Acquire *lock*. + + If *waitflag* is ``1`` and another thread currently holds the lock, this + function will wait until the lock can be acquired and will always return + ``1``. + + If *waitflag* is ``0`` and another thread holds the lock, this function will + not wait and instead return ``0``. If the lock is not held by any other + thread, then this function will acquire it and return ``1``. + + Unlike :c:func:`PyThread_acquire_lock_timed`, acquiring the lock cannot be + interrupted by a signal. + + The caller does not need to hold an :term:`attached thread state`. + + +.. c:function:: int PyThread_release_lock(PyThread_type_lock lock) + + Release *lock*. If *lock* is not held, then this function issues a + fatal error. + + The caller does not need to hold an :term:`attached thread state`. + + +Operating System Thread APIs +============================ + +.. c:macro:: PYTHREAD_INVALID_THREAD_ID + + Sentinel value for an invalid thread ID. + + This is currently equivalent to ``(unsigned long)-1``. + + +.. c:function:: unsigned long PyThread_start_new_thread(void (*func)(void *), void *arg) + + Start function *func* in a new thread with argument *arg*. + The resulting thread is not intended to be joined. + + *func* must not be ``NULL``, but *arg* may be ``NULL``. + + On success, this function returns the identifier of the new thread; on failure, + this returns :c:macro:`PYTHREAD_INVALID_THREAD_ID`. + + The caller does not need to hold an :term:`attached thread state`. + + +.. c:function:: unsigned long PyThread_get_thread_ident(void) + + Return the identifier of the current thread, which will never be zero. + + This function cannot fail, and the caller does not need to hold an + :term:`attached thread state`. + + .. seealso:: + :py:func:`threading.get_ident` + + +.. c:function:: PyObject *PyThread_GetInfo(void) + + Get general information about the current thread in the form of a + :ref:`struct sequence ` object. This information is + accessible as :py:attr:`sys.thread_info` in Python. + + On success, this returns a new :term:`strong reference` to the thread + information; on failure, this returns ``NULL`` with an exception set. + + The caller must hold an :term:`attached thread state`. + + +.. c:macro:: PY_HAVE_THREAD_NATIVE_ID + + This macro is defined when the system supports native thread IDs. + + +.. c:function:: unsigned long PyThread_get_thread_native_id(void) + + Get the native identifier of the current thread as it was assigned by the operating + system's kernel, which will never be less than zero. + + This function is only available when :c:macro:`PY_HAVE_THREAD_NATIVE_ID` is + defined. + + This function cannot fail, and the caller does not need to hold an + :term:`attached thread state`. + + .. seealso:: + :py:func:`threading.get_native_id` + + +.. c:function:: void PyThread_exit_thread(void) + + Terminate the current thread. This function is generally considered unsafe + and should be avoided. It is kept solely for backwards compatibility. + + This function is only safe to call if all functions in the full call + stack are written to safely allow it. + + .. warning:: + + If the current system uses POSIX threads (also known as "pthreads"), + this calls :manpage:`pthread_exit(3)`, which attempts to unwind the stack + and call C++ destructors on some libc implementations. However, if a + ``noexcept`` function is reached, it may terminate the process. + Other systems, such as macOS, do unwinding. + + On Windows, this function calls ``_endthreadex()``, which kills the thread + without calling C++ destructors. + + In any case, there is a risk of corruption on the thread's stack. + + .. deprecated:: 3.14 + + +.. c:function:: void PyThread_init_thread(void) + + Initialize ``PyThread*`` APIs. Python executes this function automatically, + so there's little need to call it from an extension module. + + +.. c:function:: int PyThread_set_stacksize(size_t size) + + Set the stack size of the current thread to *size* bytes. + + This function returns ``0`` on success, ``-1`` if *size* is invalid, or + ``-2`` if the system does not support changing the stack size. This function + does not set exceptions. + + The caller does not need to hold an :term:`attached thread state`. + + +.. c:function:: size_t PyThread_get_stacksize(void) + + Return the stack size of the current thread in bytes, or ``0`` if the system's + default stack size is in use. + + The caller does not need to hold an :term:`attached thread state`. diff --git a/Doc/c-api/intro.rst b/Doc/c-api/intro.rst index bace21b7981091..bb94bcb86a7899 100644 --- a/Doc/c-api/intro.rst +++ b/Doc/c-api/intro.rst @@ -171,6 +171,17 @@ complete listing. Like ``getenv(s)``, but returns ``NULL`` if :option:`-E` was passed on the command line (see :c:member:`PyConfig.use_environment`). +.. c:macro:: Py_LOCAL(type) + + Declare a function returning the specified *type* using a fast-calling + qualifier for functions that are local to the current file. + Semantically, this is equivalent to ``static type``. + +.. c:macro:: Py_LOCAL_INLINE(type) + + Equivalent to :c:macro:`Py_LOCAL` but additionally requests the function + be inlined. + .. c:macro:: Py_MAX(x, y) Return the maximum value between ``x`` and ``y``. @@ -311,6 +322,19 @@ complete listing. PyDoc_VAR(python_doc) = PyDoc_STR("A genus of constricting snakes in the Pythonidae family native " "to the tropics and subtropics of the Eastern Hemisphere."); +.. c:macro:: Py_ARRAY_LENGTH(array) + + Compute the length of a statically allocated C array at compile time. + + The *array* argument must be a C array with a size known at compile time. + Passing an array with an unknown size, such as a heap-allocated array, + will result in a compilation error on some compilers, or otherwise produce + incorrect results. + + This is roughly equivalent to:: + + sizeof(array) / sizeof((array)[0]) + .. _api-objects: diff --git a/Doc/c-api/module.rst b/Doc/c-api/module.rst index 1994a3c7d01ca7..a12f6331c85912 100644 --- a/Doc/c-api/module.rst +++ b/Doc/c-api/module.rst @@ -3,11 +3,10 @@ .. _moduleobjects: Module Objects --------------- +============== .. index:: pair: object; module - .. c:var:: PyTypeObject PyModule_Type .. index:: single: ModuleType (in module types) @@ -97,13 +96,6 @@ Module Objects Note that Python code may rename a module by setting its :py:attr:`~module.__name__` attribute. -.. c:function:: void* PyModule_GetState(PyObject *module) - - Return the "state" of the module, that is, a pointer to the block of memory - allocated at module creation time, or ``NULL``. See - :c:member:`PyModuleDef.m_size`. - - .. c:function:: PyModuleDef* PyModule_GetDef(PyObject *module) Return a pointer to the :c:type:`PyModuleDef` struct from which the module was @@ -141,180 +133,188 @@ Module Objects unencodable filenames, use :c:func:`PyModule_GetFilenameObject` instead. -.. _pymoduledef: +.. _pymoduledef_slot: -Module definitions ------------------- +Module definition +----------------- -The functions in the previous section work on any module object, including -modules imported from Python code. +Modules created using the C API are typically defined using an +array of :dfn:`slots`. +The slots provide a "description" of how a module should be created. -Modules defined using the C API typically use a *module definition*, -:c:type:`PyModuleDef` -- a statically allocated, constant “description" of -how a module should be created. +.. versionchanged:: next -The definition is usually used to define an extension's “main” module object -(see :ref:`extension-modules` for details). -It is also used to -:ref:`create extension modules dynamically `. + Previously, a :c:type:`PyModuleDef` struct was necessary to define modules. + The older way of defining modules is still available: consult either the + :ref:`pymoduledef` section or earlier versions of this documentation + if you plan to support earlier Python versions. -Unlike :c:func:`PyModule_New`, the definition allows management of -*module state* -- a piece of memory that is allocated and cleared together -with the module object. -Unlike the module's Python attributes, Python code cannot replace or delete -data stored in module state. +The slots array is usually used to define an extension module's “main” +module object (see :ref:`extension-modules` for details). +It can also be used to +:ref:`create extension modules dynamically `. -.. c:type:: PyModuleDef +Unless specified otherwise, the same slot ID may not be repeated +in an array of slots. - The module definition struct, which holds all information needed to create - a module object. - This structure must be statically allocated (or be otherwise guaranteed - to be valid while any modules created from it exist). - Usually, there is only one variable of this type for each extension module. - .. c:member:: PyModuleDef_Base m_base +.. c:type:: PyModuleDef_Slot - Always initialize this member to :c:macro:`PyModuleDef_HEAD_INIT`. + .. c:member:: int slot - .. c:member:: const char *m_name + A slot ID, chosen from the available ``Py_mod_*`` values explained below. - Name for the new module. + An ID of 0 marks the end of a :c:type:`!PyModuleDef_Slot` array. - .. c:member:: const char *m_doc + .. c:member:: void* value - Docstring for the module; usually a docstring variable created with - :c:macro:`PyDoc_STRVAR` is used. + Value of the slot, whose meaning depends on the slot ID. - .. c:member:: Py_ssize_t m_size + The value may not be NULL. + To leave a slot out, omit the :c:type:`PyModuleDef_Slot` entry entirely. - Module state may be kept in a per-module memory area that can be - retrieved with :c:func:`PyModule_GetState`, rather than in static globals. - This makes modules safe for use in multiple sub-interpreters. + .. versionadded:: 3.5 - This memory area is allocated based on *m_size* on module creation, - and freed when the module object is deallocated, after the - :c:member:`~PyModuleDef.m_free` function has been called, if present. - Setting it to a non-negative value means that the module can be - re-initialized and specifies the additional amount of memory it requires - for its state. +Metadata slots +.............. - Setting ``m_size`` to ``-1`` means that the module does not support - sub-interpreters, because it has global state. - Negative ``m_size`` is only allowed when using - :ref:`legacy single-phase initialization ` - or when :ref:`creating modules dynamically `. +.. c:macro:: Py_mod_name - See :PEP:`3121` for more details. + :c:type:`Slot ID ` for the name of the new module, + as a NUL-terminated UTF8-encoded ``const char *``. - .. c:member:: PyMethodDef* m_methods + Note that modules are typically created using a + :py:class:`~importlib.machinery.ModuleSpec`, and when they are, the + name from the spec will be used instead of :c:data:`!Py_mod_name`. + However, it is still recommended to include this slot for introspection + and debugging purposes. - A pointer to a table of module-level functions, described by - :c:type:`PyMethodDef` values. Can be ``NULL`` if no functions are present. + .. versionadded:: next - .. c:member:: PyModuleDef_Slot* m_slots + Use :c:member:`PyModuleDef.m_name` instead to support previous versions. - An array of slot definitions for multi-phase initialization, terminated by - a ``{0, NULL}`` entry. - When using legacy single-phase initialization, *m_slots* must be ``NULL``. +.. c:macro:: Py_mod_doc - .. versionchanged:: 3.5 + :c:type:`Slot ID ` for the docstring of the new + module, as a NUL-terminated UTF8-encoded ``const char *``. - Prior to version 3.5, this member was always set to ``NULL``, - and was defined as: + Usually it is set to a variable created with :c:macro:`PyDoc_STRVAR`. - .. c:member:: inquiry m_reload + .. versionadded:: next - .. c:member:: traverseproc m_traverse + Use :c:member:`PyModuleDef.m_doc` instead to support previous versions. - A traversal function to call during GC traversal of the module object, or - ``NULL`` if not needed. - This function is not called if the module state was requested but is not - allocated yet. This is the case immediately after the module is created - and before the module is executed (:c:data:`Py_mod_exec` function). More - precisely, this function is not called if :c:member:`~PyModuleDef.m_size` is greater - than 0 and the module state (as returned by :c:func:`PyModule_GetState`) - is ``NULL``. +Feature slots +............. - .. versionchanged:: 3.9 - No longer called before the module state is allocated. +.. c:macro:: Py_mod_abi - .. c:member:: inquiry m_clear + :c:type:`Slot ID ` whose value points to + a :c:struct:`PyABIInfo` structure describing the ABI that + the extension is using. - A clear function to call during GC clearing of the module object, or - ``NULL`` if not needed. + A suitable :c:struct:`!PyABIInfo` variable can be defined using the + :c:macro:`PyABIInfo_VAR` macro, as in: - This function is not called if the module state was requested but is not - allocated yet. This is the case immediately after the module is created - and before the module is executed (:c:data:`Py_mod_exec` function). More - precisely, this function is not called if :c:member:`~PyModuleDef.m_size` is greater - than 0 and the module state (as returned by :c:func:`PyModule_GetState`) - is ``NULL``. + .. code-block:: c - Like :c:member:`PyTypeObject.tp_clear`, this function is not *always* - called before a module is deallocated. For example, when reference - counting is enough to determine that an object is no longer used, - the cyclic garbage collector is not involved and - :c:member:`~PyModuleDef.m_free` is called directly. + PyABIInfo_VAR(abi_info); - .. versionchanged:: 3.9 - No longer called before the module state is allocated. + static PyModuleDef_Slot mymodule_slots[] = { + {Py_mod_abi, &abi_info}, + ... + }; - .. c:member:: freefunc m_free + When creating a module, Python checks the value of this slot + using :c:func:`PyABIInfo_Check`. - A function to call during deallocation of the module object, or ``NULL`` - if not needed. + .. versionadded:: 3.15 - This function is not called if the module state was requested but is not - allocated yet. This is the case immediately after the module is created - and before the module is executed (:c:data:`Py_mod_exec` function). More - precisely, this function is not called if :c:member:`~PyModuleDef.m_size` is greater - than 0 and the module state (as returned by :c:func:`PyModule_GetState`) - is ``NULL``. +.. c:macro:: Py_mod_multiple_interpreters - .. versionchanged:: 3.9 - No longer called before the module state is allocated. + :c:type:`Slot ID ` whose value is one of: + .. c:namespace:: NULL -Module slots -............ + .. c:macro:: Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED -.. c:type:: PyModuleDef_Slot + The module does not support being imported in subinterpreters. - .. c:member:: int slot + .. c:macro:: Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED - A slot ID, chosen from the available values explained below. + The module supports being imported in subinterpreters, + but only when they share the main interpreter's GIL. + (See :ref:`isolating-extensions-howto`.) - .. c:member:: void* value + .. c:macro:: Py_MOD_PER_INTERPRETER_GIL_SUPPORTED - Value of the slot, whose meaning depends on the slot ID. + The module supports being imported in subinterpreters, + even when they have their own GIL. + (See :ref:`isolating-extensions-howto`.) - .. versionadded:: 3.5 + This slot determines whether or not importing this module + in a subinterpreter will fail. + + If ``Py_mod_multiple_interpreters`` is not specified, the import + machinery defaults to ``Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED``. + + .. versionadded:: 3.12 + +.. c:macro:: Py_mod_gil + + :c:type:`Slot ID ` whose value is one of: + + .. c:namespace:: NULL + + .. c:macro:: Py_MOD_GIL_USED + + The module depends on the presence of the global interpreter lock (GIL), + and may access global state without synchronization. + + .. c:macro:: Py_MOD_GIL_NOT_USED + + The module is safe to run without an active GIL. + + This slot is ignored by Python builds not configured with + :option:`--disable-gil`. Otherwise, it determines whether or not importing + this module will cause the GIL to be automatically enabled. See + :ref:`whatsnew313-free-threaded-cpython` for more detail. + + If ``Py_mod_gil`` is not specified, the import machinery defaults to + ``Py_MOD_GIL_USED``. + + .. versionadded:: 3.13 -The available slot types are: + +Creation and initialization slots +................................. .. c:macro:: Py_mod_create - Specifies a function that is called to create the module object itself. - The *value* pointer of this slot must point to a function of the signature: + :c:type:`Slot ID ` for a function that creates + the module object itself. + The function must have the signature: .. c:function:: PyObject* create_module(PyObject *spec, PyModuleDef *def) :no-index-entry: :no-contents-entry: - The function receives a :py:class:`~importlib.machinery.ModuleSpec` - instance, as defined in :PEP:`451`, and the module definition. - It should return a new module object, or set an error + The function will be called with: + + - *spec*: a ``ModuleSpec``-like object, meaning that any attributes defined + for :py:class:`importlib.machinery.ModuleSpec` have matching semantics. + However, any of the attributes may be missing. + - *def*: ``NULL``, or the module definition if the module is created from one. + + The function should return a new module object, or set an error and return ``NULL``. This function should be kept minimal. In particular, it should not call arbitrary Python code, as trying to import the same module again may result in an infinite loop. - Multiple ``Py_mod_create`` slots may not be specified in one module - definition. - If ``Py_mod_create`` is not specified, the import machinery will create a normal module object using :c:func:`PyModule_New`. The name is taken from *spec*, not the definition, to allow extension modules to dynamically adjust @@ -322,118 +322,442 @@ The available slot types are: names through symlinks, all while sharing a single module definition. There is no requirement for the returned object to be an instance of - :c:type:`PyModule_Type`. Any type can be used, as long as it supports - setting and getting import-related attributes. - However, only ``PyModule_Type`` instances may be returned if the - ``PyModuleDef`` has non-``NULL`` ``m_traverse``, ``m_clear``, - ``m_free``; non-zero ``m_size``; or slots other than ``Py_mod_create``. + :c:type:`PyModule_Type`. + However, some slots may only be used with + :c:type:`!PyModule_Type` instances; in particular: + + - :c:macro:`Py_mod_exec`, + - :ref:`module state slots ` (``Py_mod_state_*``), + - :c:macro:`Py_mod_token`. + + .. versionadded:: 3.5 + + .. versionchanged:: next + + The *slots* argument may be a ``ModuleSpec``-like object, rather than + a true :py:class:`~importlib.machinery.ModuleSpec` instance. + Note that previous versions of CPython did not enforce this. + + The *def* argument may now be ``NULL``, since modules are not necessarily + made from definitions. .. c:macro:: Py_mod_exec - Specifies a function that is called to *execute* the module. - This is equivalent to executing the code of a Python module: typically, - this function adds classes and constants to the module. + :c:type:`Slot ID ` for a function that will + :dfn:`execute`, or initialize, the module. + This function does the equivalent to executing the code of a Python module: + typically, it adds classes and constants to the module. The signature of the function is: .. c:function:: int exec_module(PyObject* module) :no-index-entry: :no-contents-entry: - If multiple ``Py_mod_exec`` slots are specified, they are processed in the - order they appear in the *m_slots* array. + See the :ref:`capi-module-support-functions` section for some useful + functions to call. -.. c:macro:: Py_mod_multiple_interpreters + For backwards compatibility, the :c:type:`PyModuleDef.m_slots` array may + contain multiple :c:macro:`!Py_mod_exec` slots; these are processed in the + order they appear in the array. + Elsewhere (that is, in arguments to :c:func:`PyModule_FromSlotsAndSpec` + and in return values of :samp:`PyModExport_{}`), repeating the slot + is not allowed. - Specifies one of the following values: + .. versionadded:: 3.5 - .. c:namespace:: NULL + .. versionchanged:: next - .. c:macro:: Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED + Repeated ``Py_mod_exec`` slots are disallowed, except in + :c:type:`PyModuleDef.m_slots`. - The module does not support being imported in subinterpreters. +.. c:macro:: Py_mod_methods - .. c:macro:: Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED + :c:type:`Slot ID ` for a table of module-level + functions, as an array of :c:type:`PyMethodDef` values suitable as the + *functions* argument to :c:func:`PyModule_AddFunctions`. - The module supports being imported in subinterpreters, - but only when they share the main interpreter's GIL. - (See :ref:`isolating-extensions-howto`.) + Like other slot IDs, a slots array may only contain one + :c:macro:`!Py_mod_methods` entry. + To add functions from multiple :c:type:`PyMethodDef` arrays, call + :c:func:`PyModule_AddFunctions` in the :c:macro:`Py_mod_exec` function. - .. c:macro:: Py_MOD_PER_INTERPRETER_GIL_SUPPORTED + The table must be statically allocated (or otherwise guaranteed to outlive + the module object). - The module supports being imported in subinterpreters, - even when they have their own GIL. - (See :ref:`isolating-extensions-howto`.) + .. versionadded:: next - This slot determines whether or not importing this module - in a subinterpreter will fail. + Use :c:member:`PyModuleDef.m_methods` instead to support previous versions. - Multiple ``Py_mod_multiple_interpreters`` slots may not be specified - in one module definition. +.. _ext-module-state: - If ``Py_mod_multiple_interpreters`` is not specified, the import - machinery defaults to ``Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED``. +Module state +------------ - .. versionadded:: 3.12 +Extension modules can have *module state* -- a +piece of memory that is allocated on module creation, +and freed when the module object is deallocated. +The module state is specified using :ref:`dedicated slots `. -.. c:macro:: Py_mod_gil +A typical use of module state is storing an exception type -- or indeed *any* +type object defined by the module -- - Specifies one of the following values: +Unlike the module's Python attributes, Python code cannot replace or delete +data stored in module state. - .. c:namespace:: NULL +Keeping per-module information in attributes and module state, rather than in +static globals, makes module objects *isolated* and safer for use in +multiple sub-interpreters. +It also helps Python do an orderly clean-up when it shuts down. - .. c:macro:: Py_MOD_GIL_USED +Extensions that keep references to Python objects as part of module state must +implement :c:macro:`Py_mod_state_traverse` and :c:macro:`Py_mod_state_clear` +functions to avoid reference leaks. - The module depends on the presence of the global interpreter lock (GIL), - and may access global state without synchronization. +To retrieve the state from a given module, use the following functions: - .. c:macro:: Py_MOD_GIL_NOT_USED +.. c:function:: void* PyModule_GetState(PyObject *module) - The module is safe to run without an active GIL. + Return the "state" of the module, that is, a pointer to the block of memory + allocated at module creation time, or ``NULL``. See + :c:macro:`Py_mod_state_size`. - This slot is ignored by Python builds not configured with - :option:`--disable-gil`. Otherwise, it determines whether or not importing - this module will cause the GIL to be automatically enabled. See - :ref:`whatsnew313-free-threaded-cpython` for more detail. + On error, return ``NULL`` with an exception set. + Use :c:func:`PyErr_Occurred` to tell this case apart from missing + module state. - Multiple ``Py_mod_gil`` slots may not be specified in one module definition. - If ``Py_mod_gil`` is not specified, the import machinery defaults to - ``Py_MOD_GIL_USED``. +.. c:function:: int PyModule_GetStateSize(PyObject *, Py_ssize_t *result) - .. versionadded:: 3.13 + Set *\*result* to the size of the module's state, as specified using + :c:macro:`Py_mod_state_size` (or :c:member:`PyModuleDef.m_size`), + and return 0. -.. c:macro:: Py_mod_abi + On error, set *\*result* to -1, and return -1 with an exception set. - A pointer to a :c:struct:`PyABIInfo` structure that describes the ABI that - the extension is using. + .. versionadded:: next - When the module is loaded, the :c:struct:`!PyABIInfo` in this slot is checked - using :c:func:`PyABIInfo_Check`. - A suitable :c:struct:`!PyABIInfo` variable can be defined using the - :c:macro:`PyABIInfo_VAR` macro, as in: - .. code-block:: c +.. _ext-module-state-slots: - PyABIInfo_VAR(abi_info); +Slots for defining module state +............................... - static PyModuleDef_Slot mymodule_slots[] = { - {Py_mod_abi, &abi_info}, - ... - }; +The following :c:member:`PyModuleDef_Slot.slot` IDs are available for +defining the module state. - .. versionadded:: 3.15 +.. c:macro:: Py_mod_state_size + :c:type:`Slot ID ` for the size of the module state, + in bytes. -.. _moduledef-dynamic: + Setting the value to a non-negative value means that the module can be + re-initialized and specifies the additional amount of memory it requires + for its state. + + See :PEP:`3121` for more details. + + Use :c:func:`PyModule_GetStateSize` to retrieve the size of a given module. + + .. versionadded:: next + + Use :c:member:`PyModuleDef.m_size` instead to support previous versions. + +.. c:macro:: Py_mod_state_traverse + + :c:type:`Slot ID ` for a traversal function to call + during GC traversal of the module object. + + The signature of the function, and meanings of the arguments, + is similar as for :c:member:`PyTypeObject.tp_traverse`: + + .. c:function:: int traverse_module_state(PyObject *module, visitproc visit, void *arg) + :no-index-entry: + :no-contents-entry: + + This function is not called if the module state was requested but is not + allocated yet. This is the case immediately after the module is created + and before the module is executed (:c:data:`Py_mod_exec` function). More + precisely, this function is not called if the state size + (:c:data:`Py_mod_state_size`) is greater than 0 and the module state + (as returned by :c:func:`PyModule_GetState`) is ``NULL``. + + .. versionadded:: next + + Use :c:member:`PyModuleDef.m_size` instead to support previous versions. + +.. c:macro:: Py_mod_state_clear + + :c:type:`Slot ID ` for a clear function to call + during GC clearing of the module object. + + The signature of the function is: + + .. c:function:: int clear_module_state(PyObject* module) + :no-index-entry: + :no-contents-entry: + + This function is not called if the module state was requested but is not + allocated yet. This is the case immediately after the module is created + and before the module is executed (:c:data:`Py_mod_exec` function). More + precisely, this function is not called if the state size + (:c:data:`Py_mod_state_size`) is greater than 0 and the module state + (as returned by :c:func:`PyModule_GetState`) is ``NULL``. + + Like :c:member:`PyTypeObject.tp_clear`, this function is not *always* + called before a module is deallocated. For example, when reference + counting is enough to determine that an object is no longer used, + the cyclic garbage collector is not involved and + the :c:macro:`Py_mod_state_free` function is called directly. + + .. versionadded:: next + + Use :c:member:`PyModuleDef.m_clear` instead to support previous versions. + +.. c:macro:: Py_mod_state_free + + :c:type:`Slot ID ` for a function to call during + deallocation of the module object. + + The signature of the function is: + + .. c:function:: int free_module_state(PyObject* module) + :no-index-entry: + :no-contents-entry: + + This function is not called if the module state was requested but is not + allocated yet. This is the case immediately after the module is created + and before the module is executed (:c:data:`Py_mod_exec` function). More + precisely, this function is not called if the state size + (:c:data:`Py_mod_state_size`) is greater than 0 and the module state + (as returned by :c:func:`PyModule_GetState`) is ``NULL``. + + .. versionadded:: next + + Use :c:member:`PyModuleDef.m_free` instead to support previous versions. + + +.. _ext-module-token: + +Module token +............ + +Each module may have an associated *token*: a pointer-sized value intended to +identify of the module state's memory layout. +This means that if you have a module object, but you are not sure if it +“belongs” to your extension, you can check using code like this: + +.. code-block:: c + + PyObject *module = + + void *module_token; + if (PyModule_GetToken(module, &module_token) < 0) { + return NULL; + } + if (module_token != your_token) { + PyErr_SetString(PyExc_ValueError, "unexpected module") + return NULL; + } + + // This module's state has the expected memory layout; it's safe to cast + struct my_state state = (struct my_state*)PyModule_GetState(module) + +A module's token -- and the *your_token* value to use in the above code -- is: + +- For modules created with :c:type:`PyModuleDef`: the address of that + :c:type:`PyModuleDef`; +- For modules defined with the :c:macro:`Py_mod_token` slot: the value + of that slot; +- For modules created from an ``PyModExport_*`` + :ref:`export hook `: the slots array that the export + hook returned (unless overriden with :c:macro:`Py_mod_token`). + +.. c:macro:: Py_mod_token + + :c:type:`Slot ID ` for the module token. + + If you use this slot to set the module token (rather than rely on the + default), you must ensure that: + + * The pointer outlives the class, so it's not reused for something else + while the class exists. + * It "belongs" to the extension module where the class lives, so it will not + clash with other extensions. + * If the token points to a :c:type:`PyModuleDef` struct, the module should + behave as if it was created from that :c:type:`PyModuleDef`. + In particular, the module state must have matching layout and semantics. + + Modules created from :c:type:`PyModuleDef` allways use the address of + the :c:type:`PyModuleDef` as the token. + This means that :c:macro:`!Py_mod_token` cannot be used in + :c:member:`PyModuleDef.m_slots`. + + .. versionadded:: next + +.. c:function:: int PyModule_GetToken(PyObject *module, void** result) + + Set *\*result* to the module's token and return 0. + + On error, set *\*result* to NULL, and return -1 with an exception set. + + .. versionadded:: next + +See also :c:func:`PyType_GetModuleByToken`. + + +.. _module-from-slots: Creating extension modules dynamically -------------------------------------- -The following functions may be used to create a module outside of an -extension's :ref:`initialization function `. -They are also used in -:ref:`single-phase initialization `. +The following functions may be used to create an extension module dynamically, +rather than from an extension's :ref:`export hook `. + +.. c:function:: PyObject *PyModule_FromSlotsAndSpec(const PyModuleDef_Slot *slots, PyObject *spec) + + Create a new module object, given an array of :ref:`slots ` + and the :py:class:`~importlib.machinery.ModuleSpec` *spec*. + + The *slots* argument must point to an array of :c:type:`PyModuleDef_Slot` + structures, terminated by an entry slot with slot ID of 0 + (typically written as ``{0}`` or ``{0, NULL}`` in C). + The *slots* argument may not be ``NULL``. + + The *spec* argument may be any ``ModuleSpec``-like object, as described + in :c:macro:`Py_mod_create` documentation. + Currently, the *spec* must have a ``name`` attribute. + + On success, return the new module. + On error, return ``NULL`` with an exception set. + + Note that this does not process the module's execution slot + (:c:data:`Py_mod_exec`). + Both :c:func:`!PyModule_FromSlotsAndSpec` and :c:func:`PyModule_Exec` + must be called to fully initialize a module. + (See also :ref:`multi-phase-initialization`.) + + The *slots* array only needs to be valid for the duration of the + :c:func:`!PyModule_FromSlotsAndSpec` call. + In particular, it may be heap-allocated. + + .. versionadded:: next + +.. c:function:: int PyModule_Exec(PyObject *module) + + Execute the :c:data:`Py_mod_exec` slot(s) of the given *module*. + + On success, return 0. + On error, return -1 with an exception set. + + For clarity: If *module* has no slots, for example if it uses + :ref:`legacy single-phase initialization `, + this function does nothing and returns 0. + + .. versionadded:: next + + + +.. _pymoduledef: + +Module definition struct +------------------------ + +Traditionally, extension modules were defined using a *module definition* +as the “description" of how a module should be created. +Rather than using an array of :ref:`slots ` directly, +the definition has dedicated members for most common functionality, +and allows additional slots as an extension mechanism. + +This way of defining modules is still available and there are no plans to +remove it. + +.. c:type:: PyModuleDef + + The module definition struct, which holds information needed to create + a module object. + + This structure must be statically allocated (or be otherwise guaranteed + to be valid while any modules created from it exist). + Usually, there is only one variable of this type for each extension module + defined this way. + + .. c:member:: PyModuleDef_Base m_base + + Always initialize this member to :c:macro:`PyModuleDef_HEAD_INIT`: + + .. c:namespace:: NULL + + .. c:type:: PyModuleDef_Base + + The type of :c:member:`!PyModuleDef.m_base`. + + .. c:macro:: PyModuleDef_HEAD_INIT + + The required initial value for :c:member:`!PyModuleDef.m_base`. + + .. c:member:: const char *m_name + + Corresponds to the :c:macro:`Py_mod_name` slot. + + .. c:member:: const char *m_doc + + These members correspond to the :c:macro:`Py_mod_doc` slot. + Setting this to NULL is equivalent to omitting the slot. + + .. c:member:: Py_ssize_t m_size + + Corresponds to the :c:macro:`Py_mod_state_size` slot. + Setting this to zero is equivalent to omitting the slot. + + When using :ref:`legacy single-phase initialization ` + or when creating modules dynamically using :c:func:`PyModule_Create` + or :c:func:`PyModule_Create2`, :c:member:`!m_size` may be set to -1. + This indicates that the module does not support sub-interpreters, + because it has global state. + + .. c:member:: PyMethodDef *m_methods + + Corresponds to the :c:macro:`Py_mod_methods` slot. + Setting this to NULL is equivalent to omitting the slot. + + .. c:member:: PyModuleDef_Slot* m_slots + + An array of additional slots, terminated by a ``{0, NULL}`` entry. + + This array may not contain slots corresponding to :c:type:`PyModuleDef` + members. + For example, you cannot use :c:macro:`Py_mod_name` in :c:member:`!m_slots`; + the module name must be given as :c:member:`PyModuleDef.m_name`. + + .. versionchanged:: 3.5 + + Prior to version 3.5, this member was always set to ``NULL``, + and was defined as: + + .. c:member:: inquiry m_reload + + .. c:member:: traverseproc m_traverse + inquiry m_clear + freefunc m_free + + These members correspond to the :c:macro:`Py_mod_state_traverse`, + :c:macro:`Py_mod_state_clear`, and :c:macro:`Py_mod_state_free` slots, + respectively. + + Setting these members to NULL is equivalent to omitting the + corresponding slots. + + .. versionchanged:: 3.9 + + :c:member:`m_traverse`, :c:member:`m_clear` and :c:member:`m_free` + functions are longer called before the module state is allocated. + + +.. _moduledef-dynamic: + +The following API can be used to create modules from a :c:type:`!PyModuleDef` +struct: .. c:function:: PyObject* PyModule_Create(PyModuleDef *def) @@ -510,12 +834,13 @@ They are also used in useful for versioning. This may change in the future. +.. _capi-module-support-functions: + Support functions ----------------- -The following functions are provided to help initialize a module -state. -They are intended for a module's execution slots (:c:data:`Py_mod_exec`), +The following functions are provided to help initialize a module object. +They are intended for a module's execution slot (:c:data:`Py_mod_exec`), the initialization function for legacy :ref:`single-phase initialization `, or code that creates modules dynamically. diff --git a/Doc/c-api/structures.rst b/Doc/c-api/structures.rst index b4e7cb1d77e1a3..62f45def04f746 100644 --- a/Doc/c-api/structures.rst +++ b/Doc/c-api/structures.rst @@ -280,6 +280,8 @@ Implementing functions and methods Name of the method. + A ``NULL`` *ml_name* marks the end of a :c:type:`!PyMethodDef` array. + .. c:member:: PyCFunction ml_meth Pointer to the C implementation. diff --git a/Doc/c-api/type.rst b/Doc/c-api/type.rst index c7946e3190f01b..1f57cc04f5dc27 100644 --- a/Doc/c-api/type.rst +++ b/Doc/c-api/type.rst @@ -283,8 +283,8 @@ Type Objects ``Py_TYPE(self)`` may be a *subclass* of the intended class, and subclasses are not necessarily defined in the same module as their superclass. See :c:type:`PyCMethod` to get the class that defines the method. - See :c:func:`PyType_GetModuleByDef` for cases when :c:type:`!PyCMethod` cannot - be used. + See :c:func:`PyType_GetModuleByToken` for cases when :c:type:`!PyCMethod` + cannot be used. .. versionadded:: 3.9 @@ -304,10 +304,10 @@ Type Objects .. versionadded:: 3.9 -.. c:function:: PyObject* PyType_GetModuleByDef(PyTypeObject *type, struct PyModuleDef *def) +.. c:function:: PyObject* PyType_GetModuleByToken(PyTypeObject *type, const void *mod_token) - Find the first superclass whose module was created from - the given :c:type:`PyModuleDef` *def*, and return that module. + Find the first superclass whose module has the given + :ref:`module token `, and return that module. If no module is found, raises a :py:class:`TypeError` and returns ``NULL``. @@ -317,6 +317,23 @@ Type Objects and other places where a method's defining class cannot be passed using the :c:type:`PyCMethod` calling convention. + .. versionadded:: next + + +.. c:function:: PyObject* PyType_GetModuleByDef(PyTypeObject *type, struct PyModuleDef *def) + + Find the first superclass whose module was created from the given + :c:type:`PyModuleDef` *def*, or whose :ref:`module token ` + is equal to *def*, and return that module. + + Note that modules created from a :c:type:`PyModuleDef` always have their + token set to the :c:type:`PyModuleDef`'s address. + In other words, this function is equivalent to + :c:func:`PyType_GetModuleByToken`, except that it: + + - returns a borrowed reference, and + - has a non-``void*`` argument type (which is a cosmetic difference in C). + The returned reference is :term:`borrowed ` from *type*, and will be valid as long as you hold a reference to *type*. Do not release it with :c:func:`Py_DECREF` or similar. @@ -324,10 +341,10 @@ Type Objects .. versionadded:: 3.11 -.. c:function:: int PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) +.. c:function:: int PyType_GetBaseByToken(PyTypeObject *type, void *tp_token, PyTypeObject **result) Find the first superclass in *type*'s :term:`method resolution order` whose - :c:macro:`Py_tp_token` token is equal to the given one. + :c:macro:`Py_tp_token` token is equal to *tp_token*. * If found, set *\*result* to a new :term:`strong reference` to it and return ``1``. @@ -338,7 +355,7 @@ Type Objects The *result* argument may be ``NULL``, in which case *\*result* is not set. Use this if you need only the return value. - The *token* argument may not be ``NULL``. + The *tp_token* argument may not be ``NULL``. .. versionadded:: 3.14 @@ -638,7 +655,7 @@ The following functions and structs are used to create under the :ref:`limited API `. .. versionchanged:: 3.14 - The field :c:member:`~PyTypeObject.tp_vectorcall` can now set + The field :c:member:`~PyTypeObject.tp_vectorcall` can now be set using :c:data:`Py_tp_vectorcall`. See the field's documentation for details. diff --git a/Doc/c-api/veryhigh.rst b/Doc/c-api/veryhigh.rst index 3b07b5fbed5959..7eb9f0b54abd4e 100644 --- a/Doc/c-api/veryhigh.rst +++ b/Doc/c-api/veryhigh.rst @@ -396,3 +396,43 @@ Available start symbols * :pep:`484` .. versionadded:: 3.8 + + +Stack Effects +^^^^^^^^^^^^^ + +.. seealso:: + :py:func:`dis.stack_effect` + + +.. c:macro:: PY_INVALID_STACK_EFFECT + + Sentinel value representing an invalid stack effect. + + This is currently equivalent to ``INT_MAX``. + + .. versionadded:: 3.8 + + +.. c:function:: int PyCompile_OpcodeStackEffect(int opcode, int oparg) + + Compute the stack effect of *opcode* with argument *oparg*. + + On success, this function returns the stack effect; on failure, this + returns :c:macro:`PY_INVALID_STACK_EFFECT`. + + .. versionadded:: 3.4 + + +.. c:function:: int PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump) + + Similar to :c:func:`PyCompile_OpcodeStackEffect`, but don't include the + stack effect of jumping if *jump* is zero. + + If *jump* is ``0``, this will not include the stack effect of jumping, but + if *jump* is ``1`` or ``-1``, this will include it. + + On success, this function returns the stack effect; on failure, this + returns :c:macro:`PY_INVALID_STACK_EFFECT`. + + .. versionadded:: 3.8 diff --git a/Doc/conf.py b/Doc/conf.py index 0f1412d1007dc2..a4275835059efa 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -361,7 +361,7 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, document class [howto/manual]). -_stdauthor = 'Guido van Rossum and the Python development team' +_stdauthor = 'The Python development team' latex_documents = [ ('c-api/index', 'c-api.tex', 'The Python/C API', _stdauthor, 'manual'), ( diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 48f4f4919e8966..64399f6ab1ff26 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -1472,6 +1472,9 @@ PyModule_Create2:PyObject*::+1: PyModule_Create2:PyModuleDef*:def:: PyModule_Create2:int:module_api_version:: +PyModule_Exec:int::: +PyModule_ExecDef:PyObject*:module:0: + PyModule_ExecDef:int::: PyModule_ExecDef:PyObject*:module:0: PyModule_ExecDef:PyModuleDef*:def:: @@ -1485,6 +1488,10 @@ PyModule_FromDefAndSpec2:PyModuleDef*:def:: PyModule_FromDefAndSpec2:PyObject*:spec:0: PyModule_FromDefAndSpec2:int:module_api_version:: +PyModule_FromSlotsAndSpec:PyObject*::+1: +PyModule_FromSlotsAndSpec:const PyModuleDef_Slot *:slots:: +PyModule_FromSlotsAndSpec:PyObject*:spec:0: + PyModule_GetDef:PyModuleDef*::0: PyModule_GetDef:PyObject*:module:0: @@ -1506,6 +1513,14 @@ PyModule_GetNameObject:PyObject*:module:0: PyModule_GetState:void*::: PyModule_GetState:PyObject*:module:0: +PyModule_GetStateSize:int::: +PyModule_GetStateSize:PyObject*:module:0: +PyModule_GetToken:Py_ssize_t**:result:: + +PyModule_GetToken:int::: +PyModule_GetToken:PyObject*:module:0: +PyModule_GetToken:void**:result:: + PyModule_New:PyObject*::+1: PyModule_New:char*:name:: @@ -2412,6 +2427,10 @@ PyType_GetFlags:PyTypeObject*:type:0: PyType_GetName:PyObject*::+1: PyType_GetName:PyTypeObject*:type:0: +PyType_GetModuleByToken:PyObject*::+1: +PyType_GetModuleByToken:PyTypeObject*:type:0: +PyType_GetModuleByToken:PyModuleDef*:def:: + PyType_GetModuleByDef:PyObject*::0: PyType_GetModuleByDef:PyTypeObject*:type:0: PyType_GetModuleByDef:PyModuleDef*:def:: diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index 5cbf3771950fc0..9c5fdcefaf81d0 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -160,6 +160,7 @@ func,PyDict_Merge,3.2,, func,PyDict_MergeFromSeq2,3.2,, func,PyDict_New,3.2,, func,PyDict_Next,3.2,, +func,PyDict_SetDefaultRef,3.15,, func,PyDict_SetItem,3.2,, func,PyDict_SetItemString,3.2,, func,PyDict_Size,3.2,, @@ -463,6 +464,7 @@ data,PyMethodDescr_Type,3.2,, type,PyModuleDef,3.2,,full-abi type,PyModuleDef_Base,3.2,,full-abi func,PyModuleDef_Init,3.5,, +type,PyModuleDef_Slot,3.5,,full-abi data,PyModuleDef_Type,3.5,, func,PyModule_Add,3.13,, func,PyModule_AddFunctions,3.7,, @@ -914,6 +916,7 @@ func,Py_GetPlatform,3.2,, func,Py_GetRecursionLimit,3.2,, func,Py_GetVersion,3.2,, data,Py_HasFileSystemDefaultEncoding,3.2,, +func,Py_IS_TYPE,3.15,, func,Py_IncRef,3.2,, func,Py_Initialize,3.2,, func,Py_InitializeEx,3.2,, @@ -935,6 +938,8 @@ func,Py_REFCNT,3.14,, macro,Py_RELATIVE_OFFSET,3.12,, func,Py_ReprEnter,3.2,, func,Py_ReprLeave,3.2,, +func,Py_SET_SIZE,3.15,, +func,Py_SIZE,3.15,, func,Py_SetProgramName,3.2,, func,Py_SetPythonHome,3.2,, func,Py_SetRecursionLimit,3.2,, @@ -979,8 +984,12 @@ macro,Py_bf_releasebuffer,3.11,, type,Py_buffer,3.11,,full-abi type,Py_intptr_t,3.2,, macro,Py_mod_abi,3.15,, +macro,Py_mod_create,3.5,, macro,Py_mod_doc,3.15,, +macro,Py_mod_exec,3.5,, +macro,Py_mod_gil,3.13,, macro,Py_mod_methods,3.15,, +macro,Py_mod_multiple_interpreters,3.12,, macro,Py_mod_name,3.15,, macro,Py_mod_state_clear,3.15,, macro,Py_mod_state_free,3.15,, diff --git a/Doc/extending/extending.rst b/Doc/extending/extending.rst index dee92312169a27..f9b65643dfe888 100644 --- a/Doc/extending/extending.rst +++ b/Doc/extending/extending.rst @@ -426,7 +426,7 @@ A pointer to the module definition must be returned via :c:func:`PyModuleDef_Ini so that the import machinery can create the module and store it in ``sys.modules``. When embedding Python, the :c:func:`!PyInit_spam` function is not called -automatically unless there's an entry in the :c:data:`!PyImport_Inittab` table. +automatically unless there's an entry in the :c:data:`PyImport_Inittab` table. To add the module to the initialization table, use :c:func:`PyImport_AppendInittab`, optionally followed by an import of the module:: diff --git a/Doc/howto/a-conceptual-overview-of-asyncio.rst b/Doc/howto/a-conceptual-overview-of-asyncio.rst index af1e39480cc1f6..3adfedbf410ecc 100644 --- a/Doc/howto/a-conceptual-overview-of-asyncio.rst +++ b/Doc/howto/a-conceptual-overview-of-asyncio.rst @@ -1,7 +1,7 @@ .. _a-conceptual-overview-of-asyncio: **************************************** -A Conceptual Overview of :mod:`!asyncio` +A conceptual overview of :mod:`!asyncio` **************************************** This :ref:`HOWTO ` article seeks to help you build a sturdy mental @@ -37,15 +37,15 @@ In part 1, we'll cover the main, high-level building blocks of :mod:`!asyncio`: the event loop, coroutine functions, coroutine objects, tasks, and ``await``. ========== -Event Loop +Event loop ========== Everything in :mod:`!asyncio` happens relative to the event loop. -It's the star of the show. +It's the star of the show, but prefers to work behind the scenes, managing +and coordinating resources. It's like an orchestra conductor. -It's behind the scenes managing resources. Some power is explicitly granted to it, but a lot of its ability to get things -done comes from the respect and cooperation of its worker bees. +done comes from the respect and cooperation of its band members. In more technical terms, the event loop contains a collection of jobs to be run. Some jobs are added directly by you, and some indirectly by :mod:`!asyncio`. @@ -59,7 +59,7 @@ This process repeats indefinitely, with the event loop cycling endlessly onwards. If there are no more jobs pending execution, the event loop is smart enough to rest and avoid needlessly wasting CPU cycles, and will come back when there's -more work to be done. +more work to be done, such as when I/O operations complete or timers expire. Effective execution relies on jobs sharing well and cooperating; a greedy job could hog control and leave the other jobs to starve, rendering the overall @@ -170,14 +170,17 @@ Roughly speaking, :ref:`tasks ` are coroutines (not coroutine functions) tied to an event loop. A task also maintains a list of callback functions whose importance will become clear in a moment when we discuss :keyword:`await`. -The recommended way to create tasks is via :func:`asyncio.create_task`. Creating a task automatically schedules it for execution (by adding a callback to run it in the event loop's to-do list, that is, collection of jobs). +The recommended way to create tasks is via :func:`asyncio.create_task`. -Since there's only one event loop (in each thread), :mod:`!asyncio` takes care of -associating the task with the event loop for you. As such, there's no need -to specify the event loop. +:mod:`!asyncio` automatically associates tasks with the event loop for you. +This automatic association was purposely designed into :mod:`!asyncio` for +the sake of simplicity. +Without it, you'd have to keep track of the event loop object and pass it to +any coroutine function that wants to create tasks, adding redundant clutter +to your code. :: @@ -250,6 +253,10 @@ different ways:: In a crucial way, the behavior of ``await`` depends on the type of object being awaited. +^^^^^^^^^^^^^^ +Awaiting tasks +^^^^^^^^^^^^^^ + Awaiting a task will cede control from the current task or coroutine to the event loop. In the process of relinquishing control, a few important things happen. @@ -281,6 +288,10 @@ This is a basic, yet reliable mental model. In practice, the control handoffs are slightly more complex, but not by much. In part 2, we'll walk through the details that make this possible. +^^^^^^^^^^^^^^^^^^^ +Awaiting coroutines +^^^^^^^^^^^^^^^^^^^ + **Unlike tasks, awaiting a coroutine does not hand control back to the event loop!** Wrapping a coroutine in a task first, then awaiting that would cede @@ -347,8 +358,10 @@ The design intentionally trades off some conceptual clarity around usage of ``await`` for improved performance. Each time a task is awaited, control needs to be passed all the way up the call stack to the event loop. -That might sound minor, but in a large program with many ``await`` statements and a deep -call stack, that overhead can add up to a meaningful performance drag. +Then, the event loop needs to manage its internal state and work through +its processing logic to resume the next job. +That might sound minor, but in a large program with many ``await``\ s, that +overhead can add up to a non-negligible performance drag. ------------------------------------------------ A conceptual overview part 2: the nuts and bolts @@ -364,7 +377,8 @@ and how to make your own asynchronous operators. The inner workings of coroutines ================================ -:mod:`!asyncio` leverages four components to pass around control. +:mod:`!asyncio` leverages four components of Python to pass +around control. :meth:`coroutine.send(arg) ` is the method used to start or resume a coroutine. @@ -448,9 +462,9 @@ That might sound odd to you. You might be thinking: That causes the error: ``SyntaxError: yield from not allowed in a coroutine.`` This was intentionally designed for the sake of simplicity -- mandating only one way of using coroutines. + Despite that, ``yield from`` and ``await`` effectively do the same thing. Initially ``yield`` was barred as well, but was re-accepted to allow for async generators. - Despite that, ``yield from`` and ``await`` effectively do the same thing. ======= Futures diff --git a/Doc/howto/free-threading-extensions.rst b/Doc/howto/free-threading-extensions.rst index 5647ab2d87c79c..83eba8cfea3969 100644 --- a/Doc/howto/free-threading-extensions.rst +++ b/Doc/howto/free-threading-extensions.rst @@ -45,9 +45,12 @@ single-phase initialization. Multi-Phase Initialization .......................... -Extensions that use multi-phase initialization (i.e., -:c:func:`PyModuleDef_Init`) should add a :c:data:`Py_mod_gil` slot in the -module definition. If your extension supports older versions of CPython, +Extensions that use :ref:`multi-phase initialization ` +(functions like :c:func:`PyModuleDef_Init`, +:c:func:`PyModExport_* ` export hook, +:c:func:`PyModule_FromSlotsAndSpec`) should add a +:c:data:`Py_mod_gil` slot in the module definition. +If your extension supports older versions of CPython, you should guard the slot with a :c:data:`PY_VERSION_HEX` check. :: @@ -60,18 +63,12 @@ you should guard the slot with a :c:data:`PY_VERSION_HEX` check. {0, NULL} }; - static struct PyModuleDef moduledef = { - PyModuleDef_HEAD_INIT, - .m_slots = module_slots, - ... - }; - Single-Phase Initialization ........................... -Extensions that use single-phase initialization (i.e., -:c:func:`PyModule_Create`) should call :c:func:`PyUnstable_Module_SetGIL` to +Extensions that use legacy :ref:`single-phase initialization ` +(that is, :c:func:`PyModule_Create`) should call :c:func:`PyUnstable_Module_SetGIL` to indicate that they support running with the GIL disabled. The function is only defined in the free-threaded build, so you should guard the call with ``#ifdef Py_GIL_DISABLED`` to avoid compilation errors in the regular build. diff --git a/Doc/howto/unicode.rst b/Doc/howto/unicode.rst index 254fe729355353..243cc27bac7025 100644 --- a/Doc/howto/unicode.rst +++ b/Doc/howto/unicode.rst @@ -352,6 +352,8 @@ If you don't include such a comment, the default encoding used will be UTF-8 as already mentioned. See also :pep:`263` for more information. +.. _unicode-properties: + Unicode Properties ------------------ diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 5a8f0bde2e385d..2a39f248651936 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -1322,8 +1322,12 @@ attribute is determined by the ``dest`` keyword argument of For optional argument actions, the value of ``dest`` is normally inferred from the option strings. :class:`ArgumentParser` generates the value of ``dest`` by -taking the first long option string and stripping away the initial ``--`` -string. If no long option strings were supplied, ``dest`` will be derived from +taking the first double-dash long option string and stripping away the initial +``-`` characters. +If no double-dash long option strings were supplied, ``dest`` will be derived +from the first single-dash long option string by stripping the initial ``-`` +character. +If no long option strings were supplied, ``dest`` will be derived from the first short option string by stripping the initial ``-`` character. Any internal ``-`` characters will be converted to ``_`` characters to make sure the string is a valid attribute name. The examples below illustrate this @@ -1331,11 +1335,12 @@ behavior:: >>> parser = argparse.ArgumentParser() >>> parser.add_argument('-f', '--foo-bar', '--foo') + >>> parser.add_argument('-q', '-quz') >>> parser.add_argument('-x', '-y') - >>> parser.parse_args('-f 1 -x 2'.split()) - Namespace(foo_bar='1', x='2') - >>> parser.parse_args('--foo 1 -y 2'.split()) - Namespace(foo_bar='1', x='2') + >>> parser.parse_args('-f 1 -q 2 -x 3'.split()) + Namespace(foo_bar='1', quz='2', x='3') + >>> parser.parse_args('--foo 1 -quz 2 -y 3'.split()) + Namespace(foo_bar='1', quz='2', x='2') ``dest`` allows a custom attribute name to be provided:: @@ -1344,6 +1349,9 @@ behavior:: >>> parser.parse_args('--foo XXX'.split()) Namespace(bar='XXX') +.. versionchanged:: next + Single-dash long option now takes precedence over short options. + .. _deprecated: @@ -1437,8 +1445,18 @@ this API may be passed as the ``action`` parameter to >>> parser.parse_args(['--no-foo']) Namespace(foo=False) + Single-dash long options are also supported. + For example, negative option ``-nofoo`` is automatically added for + positive option ``-foo``. + But no additional options are added for short options such as ``-f``. + .. versionadded:: 3.9 + .. versionchanged:: next + Added support for single-dash options. + + Added support for alternate prefix_chars_. + The parse_args() method ----------------------- diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst index 0ea3c3c59a660d..2e7d0dbc26e5bc 100644 --- a/Doc/library/ast.rst +++ b/Doc/library/ast.rst @@ -2261,7 +2261,7 @@ and classes for traversing abstract syntax trees: The minimum supported version for ``feature_version`` is now ``(3, 7)``. The ``optimize`` argument was added. - .. versionadded:: next + .. versionadded:: 3.15 Added the *module* parameter. diff --git a/Doc/library/bdb.rst b/Doc/library/bdb.rst index c7a3e0c596b9d0..a3c6da7a6d686b 100644 --- a/Doc/library/bdb.rst +++ b/Doc/library/bdb.rst @@ -236,7 +236,7 @@ The :mod:`bdb` module also defines two classes: Normally derived classes don't override the following methods, but they may if they want to redefine the definition of stopping and breakpoints. - .. method:: is_skipped_line(module_name) + .. method:: is_skipped_module(module_name) Return ``True`` if *module_name* matches any skip pattern. diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 9a8108d882e02f..4e0db485e068a8 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -1209,7 +1209,7 @@ If a new entry overwrites an existing entry, the original insertion position is changed and moved to the end:: class LastUpdatedOrderedDict(OrderedDict): - 'Store items in the order the keys were last added' + 'Store items in the order that the keys were last updated.' def __setitem__(self, key, value): super().__setitem__(key, value) diff --git a/Doc/library/decimal.rst b/Doc/library/decimal.rst index ba882f10bbe2b8..059377756999a4 100644 --- a/Doc/library/decimal.rst +++ b/Doc/library/decimal.rst @@ -1575,7 +1575,7 @@ Constants Specification that this implementation complies with. See https://speleotrove.com/decimal/decarith.html for the specification. - .. versionadded:: next + .. versionadded:: 3.15 The following constants are only relevant for the C module. They diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 3257daf89d327b..8314fed80fa512 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -377,7 +377,7 @@ are always available. They are listed here in alphabetical order. ``ast.PyCF_ALLOW_TOP_LEVEL_AWAIT`` can now be passed in flags to enable support for top-level ``await``, ``async for``, and ``async with``. - .. versionadded:: next + .. versionadded:: 3.15 Added the *module* parameter. diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst index b2e2e11c0dc414..221c0712c7c96a 100644 --- a/Doc/library/functools.rst +++ b/Doc/library/functools.rst @@ -57,6 +57,10 @@ The :mod:`functools` module defines the following functions: another thread makes an additional call before the initial call has been completed and cached. + Call-once behavior is not guaranteed because locks are not held during the + function call. Potentially another call with the same arguments could + occur while the first call is still running. + .. versionadded:: 3.9 @@ -716,7 +720,7 @@ The :mod:`functools` module defines the following functions: .. versionadded:: 3.8 - .. versionchanged:: next + .. versionchanged:: 3.15 Added support of non-:term:`descriptor` callables. diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst index 2ef5c4b35a25cc..79a8c38626f002 100644 --- a/Doc/library/gc.rst +++ b/Doc/library/gc.rst @@ -108,10 +108,19 @@ The :mod:`gc` module provides the following functions: * ``uncollectable`` is the total number of objects which were found to be uncollectable (and were therefore moved to the :data:`garbage` - list) inside this generation. + list) inside this generation; + + * ``candidates`` is the total number of objects in this generation which were + considered for collection and traversed; + + * ``duration`` is the total time in seconds spent in collections for this + generation. .. versionadded:: 3.4 + .. versionchanged:: next + Add ``duration`` and ``candidates``. + .. function:: set_threshold(threshold0, [threshold1, [threshold2]]) @@ -313,6 +322,12 @@ values but should not rebind them): "uncollectable": When *phase* is "stop", the number of objects that could not be collected and were put in :data:`garbage`. + "candidates": When *phase* is "stop", the total number of objects in this + generation which were considered for collection and traversed. + + "duration": When *phase* is "stop", the time in seconds spent in the + collection. + Applications can add their own callbacks to this list. The primary use cases are: @@ -325,6 +340,9 @@ values but should not rebind them): .. versionadded:: 3.3 + .. versionchanged:: next + Add "duration" and "candidates". + The following constants are provided for use with :func:`set_debug`: diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst index 03ba23b6216cbf..3f0a54ac535cd6 100644 --- a/Doc/library/importlib.rst +++ b/Doc/library/importlib.rst @@ -480,7 +480,7 @@ ABC hierarchy:: .. versionchanged:: 3.5 Made the method static. - .. versionadded:: next + .. versionadded:: 3.15 Added the *fullname* parameter. @@ -1048,7 +1048,7 @@ find and load modules. :meth:`PathFinder.invalidate_caches` invalidates :class:`NamespacePath`, forcing the path value to be recomputed next time it is accessed. - .. versionadded:: next + .. versionadded:: 3.15 .. class:: SourceFileLoader(fullname, path) diff --git a/Doc/library/inspect.rst b/Doc/library/inspect.rst index c00db31a8ec051..5220c559d3d857 100644 --- a/Doc/library/inspect.rst +++ b/Doc/library/inspect.rst @@ -636,7 +636,7 @@ Retrieving source code .. versionchanged:: 3.5 Documentation strings are now inherited if not overridden. - .. versionchanged:: next + .. versionchanged:: 3.15 Added parameters *inherit_class_doc* and *fallback_to_class_doc*. Documentation strings on :class:`~functools.cached_property` diff --git a/Doc/library/math.integer.rst b/Doc/library/math.integer.rst index 6a9fe74c5e861b..0068ae2bdd5d07 100644 --- a/Doc/library/math.integer.rst +++ b/Doc/library/math.integer.rst @@ -4,7 +4,7 @@ .. module:: math.integer :synopsis: Integer-specific mathematics functions. -.. versionadded:: next +.. versionadded:: 3.15 -------------- diff --git a/Doc/library/math.rst b/Doc/library/math.rst index 54c98346b2798b..d2ff74822f97ea 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -506,7 +506,7 @@ Summation and product functions Roughly equivalent to:: - sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q))) + sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q, strict=True))) .. versionadded:: 3.8 @@ -781,7 +781,7 @@ the following functions from the :mod:`math.integer` module: Floats with integral values (like ``5.0``) are no longer accepted in the :func:`factorial` function. -.. deprecated:: next +.. deprecated:: 3.15 These aliases are :term:`soft deprecated` in favor of the :mod:`math.integer` functions. diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 7dc6c177268ec2..671270d6112212 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -3404,7 +3404,7 @@ features: .. availability:: Linux >= 4.11 with glibc >= 2.28. - .. versionadded:: next + .. versionadded:: 3.15 .. class:: statx_result @@ -3661,7 +3661,7 @@ features: .. availability:: Linux >= 4.11 with glibc >= 2.28. - .. versionadded:: next + .. versionadded:: 3.15 .. data:: STATX_TYPE @@ -3690,7 +3690,7 @@ features: .. availability:: Linux >= 4.11 with glibc >= 2.28. - .. versionadded:: next + .. versionadded:: 3.15 .. data:: AT_STATX_FORCE_SYNC @@ -3700,7 +3700,7 @@ features: .. availability:: Linux >= 4.11 with glibc >= 2.28. - .. versionadded:: next + .. versionadded:: 3.15 .. data:: AT_STATX_DONT_SYNC @@ -3709,7 +3709,7 @@ features: .. availability:: Linux >= 4.11 with glibc >= 2.28. - .. versionadded:: next + .. versionadded:: 3.15 .. data:: AT_STATX_SYNC_AS_STAT @@ -3721,7 +3721,7 @@ features: .. availability:: Linux >= 4.11 with glibc >= 2.28. - .. versionadded:: next + .. versionadded:: 3.15 .. data:: AT_NO_AUTOMOUNT @@ -3733,7 +3733,7 @@ features: .. availability:: Linux. - .. versionadded:: next + .. versionadded:: 3.15 .. function:: statvfs(path) diff --git a/Doc/library/profile.rst b/Doc/library/profile.rst index 5bf36b13c6d789..03ad50b2c5eaf8 100644 --- a/Doc/library/profile.rst +++ b/Doc/library/profile.rst @@ -347,81 +347,6 @@ The statistical profiler produces output similar to deterministic profilers but .. _profile-cli: -:mod:`!profiling.sampling` Module Reference -======================================================= - -.. module:: profiling.sampling - :synopsis: Python statistical profiler. - -This section documents the programmatic interface for the :mod:`!profiling.sampling` module. -For command-line usage, see :ref:`sampling-profiler-cli`. For conceptual information -about statistical profiling, see :ref:`statistical-profiling` - -.. function:: sample(pid, *, sort=2, sample_interval_usec=100, duration_sec=10, filename=None, all_threads=False, limit=None, show_summary=True, output_format="pstats", realtime_stats=False, native=False, gc=True) - - Sample a Python process and generate profiling data. - - This is the main entry point for statistical profiling. It creates a - :class:`SampleProfiler`, collects stack traces from the target process, and - outputs the results in the specified format. - - :param int pid: Process ID of the target Python process - :param int sort: Sort order for pstats output (default: 2 for cumulative time) - :param int sample_interval_usec: Sampling interval in microseconds (default: 100) - :param int duration_sec: Duration to sample in seconds (default: 10) - :param str filename: Output filename (None for stdout/default naming) - :param bool all_threads: Whether to sample all threads (default: False) - :param int limit: Maximum number of functions to display (default: None) - :param bool show_summary: Whether to show summary statistics (default: True) - :param str output_format: Output format - 'pstats' or 'collapsed' (default: 'pstats') - :param bool realtime_stats: Whether to display real-time statistics (default: False) - :param bool native: Whether to include ```` frames (default: False) - :param bool gc: Whether to include ```` frames (default: True) - - :raises ValueError: If output_format is not 'pstats' or 'collapsed' - - Examples:: - - # Basic usage - profile process 1234 for 10 seconds - import profiling.sampling - profiling.sampling.sample(1234) - - # Profile with custom settings - profiling.sampling.sample(1234, duration_sec=30, sample_interval_usec=50, all_threads=True) - - # Generate collapsed stack traces for flamegraph.pl - profiling.sampling.sample(1234, output_format='collapsed', filename='profile.collapsed') - -.. class:: SampleProfiler(pid, sample_interval_usec, all_threads) - - Low-level API for the statistical profiler. - - This profiler uses periodic stack sampling to collect performance data - from running Python processes with minimal overhead. It can attach to - any Python process by PID and collect stack traces at regular intervals. - - :param int pid: Process ID of the target Python process - :param int sample_interval_usec: Sampling interval in microseconds - :param bool all_threads: Whether to sample all threads or just the main thread - - .. method:: sample(collector, duration_sec=10) - - Sample the target process for the specified duration. - - Collects stack traces from the target process at regular intervals - and passes them to the provided collector for processing. - - :param collector: Object that implements ``collect()`` method to process stack traces - :param int duration_sec: Duration to sample in seconds (default: 10) - - The method tracks sampling statistics and can display real-time - information if realtime_stats is enabled. - -.. seealso:: - - :ref:`sampling-profiler-cli` - Command-line interface documentation for the statistical profiler. - Deterministic Profiler Command Line Interface ============================================= diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst index 89bca9b5b20df7..976136885eac1b 100644 --- a/Doc/library/socket.rst +++ b/Doc/library/socket.rst @@ -482,6 +482,9 @@ The AF_* and SOCK_* constants are now :class:`AddressFamily` and .. versionchanged:: 3.14 Added support for ``TCP_QUICKACK`` on Windows platforms when available. + .. versionchanged:: next + ``IPV6_HDRINCL`` was added. + .. data:: AF_CAN PF_CAN diff --git a/Doc/library/stat.rst b/Doc/library/stat.rst index 1cbec3ab847c5f..82012b31a00f20 100644 --- a/Doc/library/stat.rst +++ b/Doc/library/stat.rst @@ -511,4 +511,4 @@ meaning of these constants. STATX_ATTR_DAX STATX_ATTR_WRITE_ATOMIC - .. versionadded:: next + .. versionadded:: 3.15 diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index c539345e598777..a87898dadf4af6 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1994,10 +1994,16 @@ expression support in the :mod:`re` module). ``{}``. Each replacement field contains either the numeric index of a positional argument, or the name of a keyword argument. Returns a copy of the string where each replacement field is replaced with the string value of - the corresponding argument. + the corresponding argument. For example: + + .. doctest:: >>> "The sum of 1 + 2 is {0}".format(1+2) 'The sum of 1 + 2 is 3' + >>> "The sum of {a} + {b} is {answer}".format(answer=1+2, a=1, b=2) + 'The sum of 1 + 2 is 3' + >>> "{1} expects the {0} Inquisition!".format("Spanish", "Nobody") + 'Nobody expects the Spanish Inquisition!' See :ref:`formatstrings` for a description of the various formatting options that can be specified in format strings. @@ -2057,13 +2063,32 @@ expression support in the :mod:`re` module). from the `Alphabetic property defined in the section 4.10 'Letters, Alphabetic, and Ideographic' of the Unicode Standard `__. + For example: + + .. doctest:: + + >>> 'Letters and spaces'.isalpha() + False + >>> 'LettersOnly'.isalpha() + True + >>> 'µ'.isalpha() # non-ASCII characters can be considered alphabetical too + True + + See :ref:`unicode-properties`. .. method:: str.isascii() Return ``True`` if the string is empty or all characters in the string are ASCII, ``False`` otherwise. - ASCII characters have code points in the range U+0000-U+007F. + ASCII characters have code points in the range U+0000-U+007F. For example: + + .. doctest:: + + >>> 'ASCII characters'.isascii() + True + >>> 'µ'.isascii() + False .. versionadded:: 3.7 @@ -2073,9 +2098,18 @@ expression support in the :mod:`re` module). Return ``True`` if all characters in the string are decimal characters and there is at least one character, ``False`` otherwise. Decimal characters are those that can be used to form - numbers in base 10, e.g. U+0660, ARABIC-INDIC DIGIT + numbers in base 10, such as U+0660, ARABIC-INDIC DIGIT ZERO. Formally a decimal character is a character in the Unicode - General Category "Nd". + General Category "Nd". For example: + + .. doctest:: + + >>> '0123456789'.isdecimal() + True + >>> '٠١٢٣٤٥٦٧٨٩'.isdecimal() # Arabic-Indic digits zero to nine + True + >>> 'alphabetic'.isdecimal() + False .. method:: str.isdigit() @@ -2194,7 +2228,16 @@ expression support in the :mod:`re` module). Return a string which is the concatenation of the strings in *iterable*. A :exc:`TypeError` will be raised if there are any non-string values in *iterable*, including :class:`bytes` objects. The separator between - elements is the string providing this method. + elements is the string providing this method. For example: + + .. doctest:: + + >>> ', '.join(['spam', 'spam', 'spam']) + 'spam, spam, spam' + >>> '-'.join('Python') + 'P-y-t-h-o-n' + + See also :meth:`split`. .. method:: str.ljust(width, fillchar=' ', /) @@ -2408,6 +2451,8 @@ expression support in the :mod:`re` module). >>> " foo ".split(maxsplit=0) ['foo '] + See also :meth:`join`. + .. index:: single: universal newlines; str.splitlines method @@ -3191,7 +3236,7 @@ objects. Taking all bytes is a zero-copy operation. - .. versionadded:: next + .. versionadded:: 3.15 See the :ref:`What's New ` entry for common code patterns which can be optimized with @@ -4604,7 +4649,7 @@ copying. .. versionadded:: 3.14 - .. method:: index(value, start=0, stop=sys.maxsize, /) + .. method:: index(value, start=0, stop=sys.maxsize, /) Return the index of the first occurrence of *value* (at or after index *start* and before index *stop*). @@ -4755,11 +4800,12 @@ other sequence-like behavior. There are currently two built-in set types, :class:`set` and :class:`frozenset`. The :class:`set` type is mutable --- the contents can be changed using methods -like :meth:`~set.add` and :meth:`~set.remove`. Since it is mutable, it has no -hash value and cannot be used as either a dictionary key or as an element of -another set. The :class:`frozenset` type is immutable and :term:`hashable` --- -its contents cannot be altered after it is created; it can therefore be used as -a dictionary key or as an element of another set. +like :meth:`add ` and :meth:`remove `. +Since it is mutable, it has no hash value and cannot be used as +either a dictionary key or as an element of another set. +The :class:`frozenset` type is immutable and :term:`hashable` --- +its contents cannot be altered after it is created; +it can therefore be used as a dictionary key or as an element of another set. Non-empty sets (not frozensets) can be created by placing a comma-separated list of elements within braces, for example: ``{'jack', 'sjoerd'}``, in addition to the diff --git a/Doc/library/symtable.rst b/Doc/library/symtable.rst index c0d9e79197de7c..f5e6f9f8acfdb8 100644 --- a/Doc/library/symtable.rst +++ b/Doc/library/symtable.rst @@ -30,7 +30,7 @@ Generating Symbol Tables It is needed to unambiguous :ref:`filter ` syntax warnings by module name. - .. versionadded:: next + .. versionadded:: 3.15 Added the *module* parameter. diff --git a/Doc/library/unicodedata.rst b/Doc/library/unicodedata.rst index fd5f56bd7eaaeb..34f21f49b4bcb1 100644 --- a/Doc/library/unicodedata.rst +++ b/Doc/library/unicodedata.rst @@ -156,7 +156,7 @@ following functions: >>> unicodedata.isxidstart('0') False - .. versionadded:: next + .. versionadded:: 3.15 .. function:: isxidcontinue(chr, /) @@ -171,7 +171,7 @@ following functions: >>> unicodedata.isxidcontinue(' ') False - .. versionadded:: next + .. versionadded:: 3.15 .. function:: decomposition(chr, /) diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index fe45becce2e5c3..0bc0a953fd921c 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -438,7 +438,7 @@ run whether the test method succeeded or not. Such a working environment for the testing code is called a :dfn:`test fixture`. A new TestCase instance is created as a unique test fixture used to execute each individual test method. Thus -:meth:`~TestCase.setUp`, :meth:`~TestCase.tearDown`, and :meth:`~TestCase.__init__` +:meth:`~TestCase.setUp`, :meth:`~TestCase.tearDown`, and :meth:`!TestCase.__init__` will be called once per test. It is recommended that you use TestCase implementations to group tests together @@ -518,7 +518,7 @@ set-up and tear-down methods:: subclasses will make future test refactorings infinitely easier. In some cases, the existing tests may have been written using the :mod:`doctest` -module. If so, :mod:`doctest` provides a :class:`DocTestSuite` class that can +module. If so, :mod:`doctest` provides a :class:`~doctest.DocTestSuite` class that can automatically build :class:`unittest.TestSuite` instances from the existing :mod:`doctest`\ -based tests. @@ -1023,7 +1023,7 @@ Test cases additional keyword argument *msg*. The context manager will store the caught exception object in its - :attr:`exception` attribute. This can be useful if the intention + :attr:`!exception` attribute. This can be useful if the intention is to perform additional checks on the exception raised:: with self.assertRaises(SomeException) as cm: @@ -1036,7 +1036,7 @@ Test cases Added the ability to use :meth:`assertRaises` as a context manager. .. versionchanged:: 3.2 - Added the :attr:`exception` attribute. + Added the :attr:`!exception` attribute. .. versionchanged:: 3.3 Added the *msg* keyword argument when used as a context manager. @@ -1089,8 +1089,8 @@ Test cases additional keyword argument *msg*. The context manager will store the caught warning object in its - :attr:`warning` attribute, and the source line which triggered the - warnings in the :attr:`filename` and :attr:`lineno` attributes. + :attr:`!warning` attribute, and the source line which triggered the + warnings in the :attr:`!filename` and :attr:`!lineno` attributes. This can be useful if the intention is to perform additional checks on the warning caught:: @@ -1437,7 +1437,7 @@ Test cases that lists the differences between the sets. This method is used by default when comparing sets or frozensets with :meth:`assertEqual`. - Fails if either of *first* or *second* does not have a :meth:`set.difference` + Fails if either of *first* or *second* does not have a :meth:`~frozenset.difference` method. .. versionadded:: 3.1 @@ -1645,7 +1645,7 @@ Test cases .. method:: asyncSetUp() :async: - Method called to prepare the test fixture. This is called after :meth:`setUp`. + Method called to prepare the test fixture. This is called after :meth:`TestCase.setUp`. This is called immediately before calling the test method; other than :exc:`AssertionError` or :exc:`SkipTest`, any exception raised by this method will be considered an error rather than a test failure. The default implementation @@ -1655,7 +1655,7 @@ Test cases :async: Method called immediately after the test method has been called and the - result recorded. This is called before :meth:`tearDown`. This is called even if + result recorded. This is called before :meth:`~TestCase.tearDown`. This is called even if the test method raised an exception, so the implementation in subclasses may need to be particularly careful about checking internal state. Any exception, other than :exc:`AssertionError` or :exc:`SkipTest`, raised by this method will be @@ -1684,7 +1684,7 @@ Test cases Sets up a new event loop to run the test, collecting the result into the :class:`TestResult` object passed as *result*. If *result* is omitted or ``None``, a temporary result object is created (by calling - the :meth:`defaultTestResult` method) and used. The result object is + the :meth:`~TestCase.defaultTestResult` method) and used. The result object is returned to :meth:`run`'s caller. At the end of the test all the tasks in the event loop are cancelled. @@ -1805,7 +1805,7 @@ Grouping tests returned by repeated iterations before :meth:`TestSuite.run` must be the same for each call iteration. After :meth:`TestSuite.run`, callers should not rely on the tests returned by this method unless the caller uses a - subclass that overrides :meth:`TestSuite._removeTestAtIndex` to preserve + subclass that overrides :meth:`!TestSuite._removeTestAtIndex` to preserve test references. .. versionchanged:: 3.2 @@ -1816,10 +1816,10 @@ Grouping tests .. versionchanged:: 3.4 In earlier versions the :class:`TestSuite` held references to each :class:`TestCase` after :meth:`TestSuite.run`. Subclasses can restore - that behavior by overriding :meth:`TestSuite._removeTestAtIndex`. + that behavior by overriding :meth:`!TestSuite._removeTestAtIndex`. In the typical usage of a :class:`TestSuite` object, the :meth:`run` method - is invoked by a :class:`TestRunner` rather than by the end-user test harness. + is invoked by a :class:`!TestRunner` rather than by the end-user test harness. Loading and running tests @@ -1853,12 +1853,12 @@ Loading and running tests .. method:: loadTestsFromTestCase(testCaseClass) Return a suite of all test cases contained in the :class:`TestCase`\ -derived - :class:`testCaseClass`. + :class:`!testCaseClass`. A test case instance is created for each method named by :meth:`getTestCaseNames`. By default these are the method names beginning with ``test``. If :meth:`getTestCaseNames` returns no - methods, but the :meth:`runTest` method is implemented, a single test + methods, but the :meth:`!runTest` method is implemented, a single test case is created for that method instead. @@ -1905,13 +1905,13 @@ Loading and running tests case class will be picked up as "a test method within a test case class", rather than "a callable object". - For example, if you have a module :mod:`SampleTests` containing a - :class:`TestCase`\ -derived class :class:`SampleTestCase` with three test - methods (:meth:`test_one`, :meth:`test_two`, and :meth:`test_three`), the + For example, if you have a module :mod:`!SampleTests` containing a + :class:`TestCase`\ -derived class :class:`!SampleTestCase` with three test + methods (:meth:`!test_one`, :meth:`!test_two`, and :meth:`!test_three`), the specifier ``'SampleTests.SampleTestCase'`` would cause this method to return a suite which will run all three test methods. Using the specifier ``'SampleTests.SampleTestCase.test_two'`` would cause it to return a test - suite which will run only the :meth:`test_two` test method. The specifier + suite which will run only the :meth:`!test_two` test method. The specifier can refer to modules and packages which have not been imported; they will be imported as a side-effect. @@ -2058,7 +2058,7 @@ Loading and running tests Testing frameworks built on top of :mod:`unittest` may want access to the :class:`TestResult` object generated by running a set of tests for reporting purposes; a :class:`TestResult` instance is returned by the - :meth:`TestRunner.run` method for this purpose. + :meth:`!TestRunner.run` method for this purpose. :class:`TestResult` instances have the following attributes that will be of interest when inspecting the results of running a set of tests: @@ -2144,12 +2144,12 @@ Loading and running tests This method can be called to signal that the set of tests being run should be aborted by setting the :attr:`shouldStop` attribute to ``True``. - :class:`TestRunner` objects should respect this flag and return without + :class:`!TestRunner` objects should respect this flag and return without running any additional tests. For example, this feature is used by the :class:`TextTestRunner` class to stop the test framework when the user signals an interrupt from the - keyboard. Interactive tools which provide :class:`TestRunner` + keyboard. Interactive tools which provide :class:`!TestRunner` implementations can use this in a similar manner. The following methods of the :class:`TestResult` class are used to maintain @@ -2469,9 +2469,9 @@ Class and Module Fixtures ------------------------- Class and module level fixtures are implemented in :class:`TestSuite`. When -the test suite encounters a test from a new class then :meth:`tearDownClass` -from the previous class (if there is one) is called, followed by -:meth:`setUpClass` from the new class. +the test suite encounters a test from a new class then +:meth:`~TestCase.tearDownClass` from the previous class (if there is one) +is called, followed by :meth:`~TestCase.setUpClass` from the new class. Similarly if a test is from a different module from the previous test then ``tearDownModule`` from the previous module is run, followed by diff --git a/Doc/library/warnings.rst b/Doc/library/warnings.rst index 2f3cf6008f58e2..0de7a90bfcb60e 100644 --- a/Doc/library/warnings.rst +++ b/Doc/library/warnings.rst @@ -513,7 +513,7 @@ Available Functions .. versionchanged:: 3.6 Add the *source* parameter. - .. versionchanged:: next + .. versionchanged:: 3.15 If no module is passed, test the filter regular expression against module names created from the path, not only the path itself. diff --git a/Doc/library/winreg.rst b/Doc/library/winreg.rst index b150c53735d634..89def6e2afe088 100644 --- a/Doc/library/winreg.rst +++ b/Doc/library/winreg.rst @@ -818,6 +818,6 @@ integer handle, and also disconnect the Windows handle from the handle object. will automatically close *key* when control leaves the :keyword:`with` block. -.. versionchanged:: next +.. versionchanged:: 3.15 Handle objects are now compared by their underlying Windows handle value instead of object identity for equality comparisons. diff --git a/Doc/library/xml.dom.pulldom.rst b/Doc/library/xml.dom.pulldom.rst index 8bceeecd46393e..a21cfaa4645419 100644 --- a/Doc/library/xml.dom.pulldom.rst +++ b/Doc/library/xml.dom.pulldom.rst @@ -74,7 +74,7 @@ given point) or to make use of the :func:`DOMEventStream.expandNode` method and switch to DOM-related processing. -.. class:: PullDom(documentFactory=None) +.. class:: PullDOM(documentFactory=None) Subclass of :class:`xml.sax.handler.ContentHandler`. diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index cbbc87b4721a9f..e59759683a6d4c 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -656,7 +656,7 @@ Functions .. versionchanged:: 3.13 Added the :meth:`!close` method. - .. versionchanged:: next + .. versionchanged:: 3.15 A :exc:`ResourceWarning` is now emitted if the iterator opened a file and is not explicitly closed. diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 882b05e87319fa..ebadbc215a0eed 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -2630,8 +2630,8 @@ Notes on using *__slots__*: descriptor directly from the base class). This renders the meaning of the program undefined. In the future, a check may be added to prevent this. -* :exc:`TypeError` will be raised if nonempty *__slots__* are defined for a - class derived from a +* :exc:`TypeError` will be raised if *__slots__* other than *__dict__* and + *__weakref__* are defined for a class derived from a :c:member:`"variable-length" built-in type ` such as :class:`int`, :class:`bytes`, and :class:`tuple`. @@ -2656,6 +2656,10 @@ Notes on using *__slots__*: of the iterator's values. However, the *__slots__* attribute will be an empty iterator. +.. versionchanged:: next + Allowed defining the *__dict__* and *__weakref__* *__slots__* for any class. + + .. _class-customization: Customizing class creation diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 04e8e5580fcd79..c41c70f0ed3306 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -6,7 +6,6 @@ Doc/c-api/descriptor.rst Doc/c-api/float.rst Doc/c-api/init_config.rst Doc/c-api/intro.rst -Doc/c-api/module.rst Doc/c-api/stable.rst Doc/c-api/typeobj.rst Doc/library/ast.rst @@ -29,14 +28,12 @@ Doc/library/pyexpat.rst Doc/library/select.rst Doc/library/socket.rst Doc/library/ssl.rst -Doc/library/stdtypes.rst Doc/library/termios.rst Doc/library/test.rst Doc/library/tkinter.rst Doc/library/tkinter.scrolledtext.rst Doc/library/tkinter.ttk.rst Doc/library/unittest.mock.rst -Doc/library/unittest.rst Doc/library/urllib.parse.rst Doc/library/urllib.request.rst Doc/library/wsgiref.rst diff --git a/Doc/tools/templates/dummy.html b/Doc/tools/templates/dummy.html index 0fdbe2a58017ff..75f6607d8f3698 100644 --- a/Doc/tools/templates/dummy.html +++ b/Doc/tools/templates/dummy.html @@ -27,8 +27,8 @@ In extensions/changes.py: -{% trans %}Deprecated since version {deprecated}, will be removed in version {removed}{% endtrans %} -{% trans %}Deprecated since version {deprecated}, removed in version {removed}{% endtrans %} +{% trans %}Deprecated since version %s, will be removed in version %s{% endtrans %} +{% trans %}Deprecated since version %s, removed in version %s{% endtrans %} In docsbuild-scripts, when rewriting indexsidebar.html with actual versions: diff --git a/Doc/whatsnew/2.5.rst b/Doc/whatsnew/2.5.rst index 3430ac8668e280..e195d9d462dda9 100644 --- a/Doc/whatsnew/2.5.rst +++ b/Doc/whatsnew/2.5.rst @@ -2169,9 +2169,9 @@ Changes to Python's build process and to the C API include: * Two new macros can be used to indicate C functions that are local to the current file so that a faster calling convention can be used. - ``Py_LOCAL(type)`` declares the function as returning a value of the + :c:macro:`Py_LOCAL` declares the function as returning a value of the specified *type* and uses a fast-calling qualifier. - ``Py_LOCAL_INLINE(type)`` does the same thing and also requests the + :c:macro:`Py_LOCAL_INLINE` does the same thing and also requests the function be inlined. If macro :c:macro:`!PY_LOCAL_AGGRESSIVE` is defined before :file:`python.h` is included, a set of more aggressive optimizations are enabled for the module; you should benchmark the results to find out if these diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 5a98297d3f8847..f479a3fb218aeb 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -394,6 +394,10 @@ Other language changes syntax warnings by module name. (Contributed by Serhiy Storchaka in :gh:`135801`.) +* Allowed defining the *__dict__* and *__weakref__* :ref:`__slots__ ` + for any class. + (Contributed by Serhiy Storchaka in :gh:`41779`.) + New modules =========== @@ -412,6 +416,10 @@ Improved modules argparse -------- +* The :class:`~argparse.BooleanOptionalAction` action supports now single-dash + long options and alternate prefix characters. + (Contributed by Serhiy Storchaka in :gh:`138525`.) + * Changed the *suggest_on_error* parameter of :class:`argparse.ArgumentParser` to default to ``True``. This enables suggestions for mistyped arguments by default. (Contributed by Jakob Schluse in :gh:`140450`.) @@ -534,6 +542,14 @@ http.cookies (Contributed by Nick Burns and Senthil Kumaran in :gh:`92936`.) +idlelib +------- + +* Add a "Reload from Disk" item to the File menu. This allows discarding + unsaved changes and reloading the current version of the file from the disk. + (Contributed by Shamil Abdulaev in :gh:`44968`.) + + inspect ------- @@ -1271,3 +1287,10 @@ that may require changes to your code. Use its :meth:`!close` method or the :func:`contextlib.closing` context manager to close it. (Contributed by Osama Abdelkader and Serhiy Storchaka in :gh:`140601`.) + +* If a short option and a single-dash long option are passed to + :meth:`argparse.ArgumentParser.add_argument`, *dest* is now inferred from + the single-dash long option. For example, in ``add_argument('-f', '-foo')``, + *dest* is now ``'foo'`` instead of ``'f'``. + Pass an explicit *dest* argument to preserve the old behavior. + (Contributed by Serhiy Storchaka in :gh:`138697`.) diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h index df9ec7050fca1a..5f2f7b6d4f56bd 100644 --- a/Include/cpython/dictobject.h +++ b/Include/cpython/dictobject.h @@ -39,16 +39,6 @@ Py_DEPRECATED(3.14) PyAPI_FUNC(PyObject *) _PyDict_GetItemStringWithError(PyObje PyAPI_FUNC(PyObject *) PyDict_SetDefault( PyObject *mp, PyObject *key, PyObject *defaultobj); -// Inserts `key` with a value `default_value`, if `key` is not already present -// in the dictionary. If `result` is not NULL, then the value associated -// with `key` is returned in `*result` (either the existing value, or the now -// inserted `default_value`). -// Returns: -// -1 on error -// 0 if `key` was not present and `default_value` was inserted -// 1 if `key` was present and `default_value` was not inserted -PyAPI_FUNC(int) PyDict_SetDefaultRef(PyObject *mp, PyObject *key, PyObject *default_value, PyObject **result); - /* Get the number of items of a dictionary. */ static inline Py_ssize_t PyDict_GET_SIZE(PyObject *op) { PyDictObject *mp; diff --git a/Include/dictobject.h b/Include/dictobject.h index 1bbeec1ab699e7..0384e3131dcdb5 100644 --- a/Include/dictobject.h +++ b/Include/dictobject.h @@ -68,6 +68,18 @@ PyAPI_FUNC(int) PyDict_GetItemRef(PyObject *mp, PyObject *key, PyObject **result PyAPI_FUNC(int) PyDict_GetItemStringRef(PyObject *mp, const char *key, PyObject **result); #endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030F0000 +// Inserts `key` with a value `default_value`, if `key` is not already present +// in the dictionary. If `result` is not NULL, then the value associated +// with `key` is returned in `*result` (either the existing value, or the now +// inserted `default_value`). +// Returns: +// -1 on error +// 0 if `key` was not present and `default_value` was inserted +// 1 if `key` was present and `default_value` was not inserted +PyAPI_FUNC(int) PyDict_SetDefaultRef(PyObject *mp, PyObject *key, PyObject *default_value, PyObject **result); +#endif + #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000 PyAPI_FUNC(PyObject *) PyObject_GenericGetDict(PyObject *, void *); #endif diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 71066f1bd9f19b..7f60eb495080ae 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -22,33 +22,48 @@ extern "C" { Another use is for the Tier 2 optimizer to decide when to create a new Tier 2 trace (executor). Again, exponential backoff is used. - The 16-bit counter is structured as a 12-bit unsigned 'value' - and a 4-bit 'backoff' field. When resetting the counter, the + The 16-bit counter is structured as a 13-bit unsigned 'value' + and a 3-bit 'backoff' field. When resetting the counter, the backoff field is incremented (until it reaches a limit) and the - value is set to a bit mask representing the value 2**backoff - 1. - The maximum backoff is 12 (the number of bits in the value). + value is set to a bit mask representing some prime value - 1. + New values and backoffs for each backoff are calculated once + at compile time and saved to value_and_backoff_next table. + The maximum backoff is 6, since 7 is an UNREACHABLE_BACKOFF. There is an exceptional value which must not be updated, 0xFFFF. */ -#define BACKOFF_BITS 4 -#define MAX_BACKOFF 12 -#define UNREACHABLE_BACKOFF 15 - -static inline bool -is_unreachable_backoff_counter(_Py_BackoffCounter counter) -{ - return counter.value_and_backoff == UNREACHABLE_BACKOFF; -} +#define BACKOFF_BITS 3 +#define BACKOFF_MASK 7 +#define MAX_BACKOFF 6 +#define UNREACHABLE_BACKOFF 7 +#define MAX_VALUE 0x1FFF + +#define MAKE_VALUE_AND_BACKOFF(value, backoff) \ + ((value << BACKOFF_BITS) | backoff) + +// For previous backoff b we use value x such that +// x + 1 is near to 2**(2*b+1) and x + 1 is prime. +static const uint16_t value_and_backoff_next[] = { + MAKE_VALUE_AND_BACKOFF(1, 1), + MAKE_VALUE_AND_BACKOFF(6, 2), + MAKE_VALUE_AND_BACKOFF(30, 3), + MAKE_VALUE_AND_BACKOFF(126, 4), + MAKE_VALUE_AND_BACKOFF(508, 5), + MAKE_VALUE_AND_BACKOFF(2052, 6), + // We use the same backoff counter for all backoffs >= MAX_BACKOFF. + MAKE_VALUE_AND_BACKOFF(8190, 6), + MAKE_VALUE_AND_BACKOFF(8190, 6), +}; static inline _Py_BackoffCounter make_backoff_counter(uint16_t value, uint16_t backoff) { - assert(backoff <= 15); - assert(value <= 0xFFF); - _Py_BackoffCounter result; - result.value_and_backoff = (value << BACKOFF_BITS) | backoff; - return result; + assert(backoff <= UNREACHABLE_BACKOFF); + assert(value <= MAX_VALUE); + return ((_Py_BackoffCounter){ + .value_and_backoff = MAKE_VALUE_AND_BACKOFF(value, backoff) + }); } static inline _Py_BackoffCounter @@ -62,14 +77,11 @@ forge_backoff_counter(uint16_t counter) static inline _Py_BackoffCounter restart_backoff_counter(_Py_BackoffCounter counter) { - assert(!is_unreachable_backoff_counter(counter)); - int backoff = counter.value_and_backoff & 15; - if (backoff < MAX_BACKOFF) { - return make_backoff_counter((1 << (backoff + 1)) - 1, backoff + 1); - } - else { - return make_backoff_counter((1 << MAX_BACKOFF) - 1, MAX_BACKOFF); - } + uint16_t backoff = counter.value_and_backoff & BACKOFF_MASK; + assert(backoff <= MAX_BACKOFF); + return ((_Py_BackoffCounter){ + .value_and_backoff = value_and_backoff_next[backoff] + }); } static inline _Py_BackoffCounter @@ -113,7 +125,7 @@ trigger_backoff_counter(void) // as we always end up tracing the loop iteration's // exhaustion iteration. Which aborts our current tracer. #define JUMP_BACKWARD_INITIAL_VALUE 4000 -#define JUMP_BACKWARD_INITIAL_BACKOFF 12 +#define JUMP_BACKWARD_INITIAL_BACKOFF 6 static inline _Py_BackoffCounter initial_jump_backoff_counter(void) { @@ -126,7 +138,7 @@ initial_jump_backoff_counter(void) * otherwise when a side exit warms up we may construct * a new trace before the Tier 1 code has properly re-specialized. */ #define SIDE_EXIT_INITIAL_VALUE 4000 -#define SIDE_EXIT_INITIAL_BACKOFF 12 +#define SIDE_EXIT_INITIAL_BACKOFF 6 static inline _Py_BackoffCounter initial_temperature_backoff_counter(void) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 47c42fccdc2376..2ae84be7b33966 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -217,10 +217,13 @@ extern void _PyEval_DeactivateOpCache(void); static inline int _Py_MakeRecCheck(PyThreadState *tstate) { uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + // Overflow if stack pointer is between soft limit and the base of the hardware stack. + // If it is below the hardware stack base, assume that we have the wrong stack limits, and do nothing. + // We could have the wrong stack limits because of limited platform support, or user-space threads. #if _Py_STACK_GROWS_DOWN - return here_addr < _tstate->c_stack_soft_limit; + return here_addr < _tstate->c_stack_soft_limit && here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES; #else - return here_addr > _tstate->c_stack_soft_limit; + return here_addr > _tstate->c_stack_soft_limit && here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES; #endif } diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index 2601de40737e85..60b6fc4a72e88f 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -32,7 +32,7 @@ extern "C" { const bool _should_lock_cs = PyList_CheckExact(_orig_seq); \ PyCriticalSection _cs; \ if (_should_lock_cs) { \ - _PyCriticalSection_Begin(&_cs, _orig_seq); \ + PyCriticalSection_Begin(&_cs, _orig_seq); \ } # define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() \ @@ -77,10 +77,10 @@ _PyCriticalSection_Resume(PyThreadState *tstate); // (private) slow path for locking the mutex PyAPI_FUNC(void) -_PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m); +_PyCriticalSection_BeginSlow(PyThreadState *tstate, PyCriticalSection *c, PyMutex *m); PyAPI_FUNC(void) -_PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, +_PyCriticalSection2_BeginSlow(PyThreadState *tstate, PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, int is_m1_locked); PyAPI_FUNC(void) @@ -95,34 +95,30 @@ _PyCriticalSection_IsActive(uintptr_t tag) } static inline void -_PyCriticalSection_BeginMutex(PyCriticalSection *c, PyMutex *m) +_PyCriticalSection_BeginMutex(PyThreadState *tstate, PyCriticalSection *c, PyMutex *m) { if (PyMutex_LockFast(m)) { - PyThreadState *tstate = _PyThreadState_GET(); c->_cs_mutex = m; c->_cs_prev = tstate->critical_section; tstate->critical_section = (uintptr_t)c; } else { - _PyCriticalSection_BeginSlow(c, m); + _PyCriticalSection_BeginSlow(tstate, c, m); } } -#define PyCriticalSection_BeginMutex _PyCriticalSection_BeginMutex static inline void -_PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op) +_PyCriticalSection_Begin(PyThreadState *tstate, PyCriticalSection *c, PyObject *op) { - _PyCriticalSection_BeginMutex(c, &op->ob_mutex); + _PyCriticalSection_BeginMutex(tstate, c, &op->ob_mutex); } -#define PyCriticalSection_Begin _PyCriticalSection_Begin // Removes the top-most critical section from the thread's stack of critical // sections. If the new top-most critical section is inactive, then it is // resumed. static inline void -_PyCriticalSection_Pop(PyCriticalSection *c) +_PyCriticalSection_Pop(PyThreadState *tstate, PyCriticalSection *c) { - PyThreadState *tstate = _PyThreadState_GET(); uintptr_t prev = c->_cs_prev; tstate->critical_section = prev; @@ -132,7 +128,7 @@ _PyCriticalSection_Pop(PyCriticalSection *c) } static inline void -_PyCriticalSection_End(PyCriticalSection *c) +_PyCriticalSection_End(PyThreadState *tstate, PyCriticalSection *c) { // If the mutex is NULL, we used the fast path in // _PyCriticalSection_BeginSlow for locks already held in the top-most @@ -141,18 +137,17 @@ _PyCriticalSection_End(PyCriticalSection *c) return; } PyMutex_Unlock(c->_cs_mutex); - _PyCriticalSection_Pop(c); + _PyCriticalSection_Pop(tstate, c); } -#define PyCriticalSection_End _PyCriticalSection_End static inline void -_PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) +_PyCriticalSection2_BeginMutex(PyThreadState *tstate, PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) { if (m1 == m2) { // If the two mutex arguments are the same, treat this as a critical // section with a single mutex. c->_cs_mutex2 = NULL; - _PyCriticalSection_BeginMutex(&c->_cs_base, m1); + _PyCriticalSection_BeginMutex(tstate, &c->_cs_base, m1); return; } @@ -167,7 +162,6 @@ _PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) if (PyMutex_LockFast(m1)) { if (PyMutex_LockFast(m2)) { - PyThreadState *tstate = _PyThreadState_GET(); c->_cs_base._cs_mutex = m1; c->_cs_mutex2 = m2; c->_cs_base._cs_prev = tstate->critical_section; @@ -176,24 +170,22 @@ _PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) tstate->critical_section = p; } else { - _PyCriticalSection2_BeginSlow(c, m1, m2, 1); + _PyCriticalSection2_BeginSlow(tstate, c, m1, m2, 1); } } else { - _PyCriticalSection2_BeginSlow(c, m1, m2, 0); + _PyCriticalSection2_BeginSlow(tstate, c, m1, m2, 0); } } -#define PyCriticalSection2_BeginMutex _PyCriticalSection2_BeginMutex static inline void -_PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b) +_PyCriticalSection2_Begin(PyThreadState *tstate, PyCriticalSection2 *c, PyObject *a, PyObject *b) { - _PyCriticalSection2_BeginMutex(c, &a->ob_mutex, &b->ob_mutex); + _PyCriticalSection2_BeginMutex(tstate, c, &a->ob_mutex, &b->ob_mutex); } -#define PyCriticalSection2_Begin _PyCriticalSection2_Begin static inline void -_PyCriticalSection2_End(PyCriticalSection2 *c) +_PyCriticalSection2_End(PyThreadState *tstate, PyCriticalSection2 *c) { // if mutex1 is NULL, we used the fast path in // _PyCriticalSection_BeginSlow for mutexes that are already held, @@ -207,9 +199,8 @@ _PyCriticalSection2_End(PyCriticalSection2 *c) PyMutex_Unlock(c->_cs_mutex2); } PyMutex_Unlock(c->_cs_base._cs_mutex); - _PyCriticalSection_Pop(&c->_cs_base); + _PyCriticalSection_Pop(tstate, &c->_cs_base); } -#define PyCriticalSection2_End _PyCriticalSection2_End static inline void _PyCriticalSection_AssertHeld(PyMutex *mutex) @@ -251,6 +242,45 @@ _PyCriticalSection_AssertHeldObj(PyObject *op) #endif } + +#undef Py_BEGIN_CRITICAL_SECTION +# define Py_BEGIN_CRITICAL_SECTION(op) \ + { \ + PyCriticalSection _py_cs; \ + PyThreadState *_cs_tstate = _PyThreadState_GET(); \ + _PyCriticalSection_Begin(_cs_tstate, &_py_cs, _PyObject_CAST(op)) + +#undef Py_BEGIN_CRITICAL_SECTION_MUTEX +# define Py_BEGIN_CRITICAL_SECTION_MUTEX(mutex) \ + { \ + PyCriticalSection _py_cs; \ + PyThreadState *_cs_tstate = _PyThreadState_GET(); \ + _PyCriticalSection_BeginMutex(_cs_tstate, &_py_cs, mutex) + +#undef Py_END_CRITICAL_SECTION +# define Py_END_CRITICAL_SECTION() \ + _PyCriticalSection_End(_cs_tstate, &_py_cs); \ + } + +#undef Py_BEGIN_CRITICAL_SECTION2 +# define Py_BEGIN_CRITICAL_SECTION2(a, b) \ + { \ + PyCriticalSection2 _py_cs2; \ + PyThreadState *_cs_tstate = _PyThreadState_GET(); \ + _PyCriticalSection2_Begin(_cs_tstate, &_py_cs2, _PyObject_CAST(a), _PyObject_CAST(b)) + +#undef Py_BEGIN_CRITICAL_SECTION2_MUTEX +# define Py_BEGIN_CRITICAL_SECTION2_MUTEX(m1, m2) \ + { \ + PyCriticalSection2 _py_cs2; \ + PyThreadState *_cs_tstate = _PyThreadState_GET(); \ + _PyCriticalSection2_BeginMutex(_cs_tstate, &_py_cs2, m1, m2) + +#undef Py_END_CRITICAL_SECTION2 +# define Py_END_CRITICAL_SECTION2() \ + _PyCriticalSection2_End(_cs_tstate, &_py_cs2); \ + } + #endif /* Py_GIL_DISABLED */ #ifdef __cplusplus diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index c3968aff8f3b8d..783747d1f01580 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -2070,6 +2070,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(symmetric_difference_update)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tabsize)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tag)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(take_bytes)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(target)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(target_is_directory)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(task)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 4dd73291df4513..374617d8284b48 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -793,6 +793,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(symmetric_difference_update) STRUCT_FOR_ID(tabsize) STRUCT_FOR_ID(tag) + STRUCT_FOR_ID(take_bytes) STRUCT_FOR_ID(target) STRUCT_FOR_ID(target_is_directory) STRUCT_FOR_ID(task) diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h index d64a18bb09e08f..4c8b8c0ed868d6 100644 --- a/Include/internal/pycore_import.h +++ b/Include/internal/pycore_import.h @@ -128,11 +128,18 @@ PyAPI_FUNC(int) _PyImport_ClearExtension(PyObject *name, PyObject *filename); // state of the module argument: // - If module is NULL or a PyModuleObject with md_gil == Py_MOD_GIL_NOT_USED, // call _PyEval_DisableGIL(). -// - Otherwise, call _PyEval_EnableGILPermanent(). If the GIL was not already -// enabled permanently, issue a warning referencing the module's name. +// - Otherwise, call _PyImport_EnableGILAndWarn // // This function may raise an exception. extern int _PyImport_CheckGILForModule(PyObject *module, PyObject *module_name); +// Assuming that the GIL is enabled from a call to +// _PyEval_EnableGILTransient(), call _PyEval_EnableGILPermanent(). +// If the GIL was not already enabled permanently, issue a warning referencing +// the module's name. +// Leave a message in verbose mode. +// +// This function may raise an exception. +extern int _PyImport_EnableGILAndWarn(PyThreadState *, PyObject *module_name); #endif #ifdef __cplusplus diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h index 368dafb90635d7..183b2d45c5ede1 100644 --- a/Include/internal/pycore_initconfig.h +++ b/Include/internal/pycore_initconfig.h @@ -153,10 +153,8 @@ typedef enum { } _PyConfigInitEnum; typedef enum { - /* For now, this means the GIL is enabled. - - gh-116329: This will eventually change to "the GIL is disabled but can - be re-enabled by loading an incompatible extension module." */ + /* In free threaded builds, this means that the GIL is disabled at startup, + but may be enabled by loading an incompatible extension module. */ _PyConfig_GIL_DEFAULT = -1, /* The GIL has been forced off or on, and will not be affected by module loading. */ diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index f861d3abd96d48..6b3d5711b92971 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -179,6 +179,10 @@ struct gc_collection_stats { Py_ssize_t collected; /* total number of uncollectable objects (put into gc.garbage) */ Py_ssize_t uncollectable; + // Total number of objects considered for collection and traversed: + Py_ssize_t candidates; + // Duration of the collection in seconds: + double duration; }; /* Running stats per generation */ @@ -189,6 +193,10 @@ struct gc_generation_stats { Py_ssize_t collected; /* total number of uncollectable objects (put into gc.garbage) */ Py_ssize_t uncollectable; + // Total number of objects considered for collection and traversed: + Py_ssize_t candidates; + // Duration of the collection in seconds: + double duration; }; enum _GCPhase { diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 08f8d0e59d12e6..a66c97f7f13677 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -2068,6 +2068,7 @@ extern "C" { INIT_ID(symmetric_difference_update), \ INIT_ID(tabsize), \ INIT_ID(tag), \ + INIT_ID(take_bytes), \ INIT_ID(target), \ INIT_ID(target_is_directory), \ INIT_ID(task), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index b1e57126b92d26..2061b1d204951d 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2952,6 +2952,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(take_bytes); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(target); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Include/object.h b/Include/object.h index f17dcba4f476b0..ad452be8405671 100644 --- a/Include/object.h +++ b/Include/object.h @@ -140,12 +140,12 @@ struct _object { # endif }; #else - Py_ssize_t ob_refcnt; + Py_ssize_t ob_refcnt; // part of stable ABI; do not change #endif _Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, char) _aligner; }; - PyTypeObject *ob_type; + PyTypeObject *ob_type; // part of stable ABI; do not change }; #else // Objects that are not owned by any thread use a thread id (tid) of zero. @@ -173,7 +173,7 @@ struct _object { #ifndef _Py_OPAQUE_PYOBJECT struct PyVarObject { PyObject ob_base; - Py_ssize_t ob_size; /* Number of items in variable part */ + Py_ssize_t ob_size; // Number of items in variable part. Part of stable ABI }; #endif typedef struct PyVarObject PyVarObject; @@ -265,56 +265,72 @@ _Py_IsOwnedByCurrentThread(PyObject *ob) } #endif -// Py_TYPE() implementation for the stable ABI +PyAPI_DATA(PyTypeObject) PyLong_Type; +PyAPI_DATA(PyTypeObject) PyBool_Type; + +/* Definitions for the stable ABI */ +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= _Py_PACK_VERSION(3, 14) PyAPI_FUNC(PyTypeObject*) Py_TYPE(PyObject *ob); +#endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= _Py_PACK_VERSION(3, 15) +PyAPI_FUNC(Py_ssize_t) Py_SIZE(PyObject *ob); +PyAPI_FUNC(int) Py_IS_TYPE(PyObject *ob, PyTypeObject *type); +PyAPI_FUNC(void) Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size); +#endif + +#ifndef _Py_OPAQUE_PYOBJECT -#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030e0000 - // Stable ABI implements Py_TYPE() as a function call - // on limited C API version 3.14 and newer. +static inline void +Py_SET_TYPE(PyObject *ob, PyTypeObject *type) +{ + ob->ob_type = type; +} + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < _Py_PACK_VERSION(3, 11) +// Non-limited API & limited API 3.11 & below: use static inline functions and +// use _PyObject_CAST so that users don't need their own casts +# define Py_TYPE(ob) _Py_TYPE_impl(_PyObject_CAST(ob)) +# define Py_SIZE(ob) _Py_SIZE_impl(_PyObject_CAST(ob)) +# define Py_IS_TYPE(ob, type) _Py_IS_TYPE_impl(_PyObject_CAST(ob), (type)) +# define Py_SET_SIZE(ob, size) _Py_SET_SIZE_impl(_PyVarObject_CAST(ob), (size)) +# define Py_SET_TYPE(ob, type) Py_SET_TYPE(_PyObject_CAST(ob), type) +#elif Py_LIMITED_API+0 < _Py_PACK_VERSION(3, 15) +// Limited API 3.11-3.14: use static inline functions, without casts +# define Py_SIZE(ob) _Py_SIZE_impl(ob) +# define Py_IS_TYPE(ob, type) _Py_IS_TYPE_impl((ob), (type)) +# define Py_SET_SIZE(ob, size) _Py_SET_SIZE_impl((ob), (size)) +# if Py_LIMITED_API+0 < _Py_PACK_VERSION(3, 14) +// Py_TYPE() is static inline only on Limited API 3.13 and below +# define Py_TYPE(ob) _Py_TYPE_impl(ob) +# endif #else - static inline PyTypeObject* _Py_TYPE(PyObject *ob) - { - return ob->ob_type; - } - #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 - # define Py_TYPE(ob) _Py_TYPE(_PyObject_CAST(ob)) - #else - # define Py_TYPE(ob) _Py_TYPE(ob) - #endif +// Limited API 3.15+: use function calls #endif -PyAPI_DATA(PyTypeObject) PyLong_Type; -PyAPI_DATA(PyTypeObject) PyBool_Type; +static inline +PyTypeObject* _Py_TYPE_impl(PyObject *ob) +{ + return ob->ob_type; +} -#ifndef _Py_OPAQUE_PYOBJECT // bpo-39573: The Py_SET_SIZE() function must be used to set an object size. -static inline Py_ssize_t Py_SIZE(PyObject *ob) { +static inline Py_ssize_t +_Py_SIZE_impl(PyObject *ob) +{ assert(Py_TYPE(ob) != &PyLong_Type); assert(Py_TYPE(ob) != &PyBool_Type); return _PyVarObject_CAST(ob)->ob_size; } -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_SIZE(ob) Py_SIZE(_PyObject_CAST(ob)) -#endif -#endif // !defined(_Py_OPAQUE_PYOBJECT) -static inline int Py_IS_TYPE(PyObject *ob, PyTypeObject *type) { +static inline int +_Py_IS_TYPE_impl(PyObject *ob, PyTypeObject *type) +{ return Py_TYPE(ob) == type; } -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_IS_TYPE(ob, type) Py_IS_TYPE(_PyObject_CAST(ob), (type)) -#endif - -#ifndef _Py_OPAQUE_PYOBJECT -static inline void Py_SET_TYPE(PyObject *ob, PyTypeObject *type) { - ob->ob_type = type; -} -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_SET_TYPE(ob, type) Py_SET_TYPE(_PyObject_CAST(ob), type) -#endif - -static inline void Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) { +static inline void +_Py_SET_SIZE_impl(PyVarObject *ob, Py_ssize_t size) +{ assert(Py_TYPE(_PyObject_CAST(ob)) != &PyLong_Type); assert(Py_TYPE(_PyObject_CAST(ob)) != &PyBool_Type); #ifdef Py_GIL_DISABLED @@ -323,9 +339,7 @@ static inline void Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) { ob->ob_size = size; #endif } -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_SET_SIZE(ob, size) Py_SET_SIZE(_PyVarObject_CAST(ob), (size)) -#endif + #endif // !defined(_Py_OPAQUE_PYOBJECT) diff --git a/Include/patchlevel.h b/Include/patchlevel.h index e3996ee86793dd..804aa1a0427ba9 100644 --- a/Include/patchlevel.h +++ b/Include/patchlevel.h @@ -24,10 +24,10 @@ #define PY_MINOR_VERSION 15 #define PY_MICRO_VERSION 0 #define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_ALPHA -#define PY_RELEASE_SERIAL 1 +#define PY_RELEASE_SERIAL 2 /* Version as a string */ -#define PY_VERSION "3.15.0a1+" +#define PY_VERSION "3.15.0a2+" /*--end constants--*/ diff --git a/Include/pyport.h b/Include/pyport.h index b250f9e308f2dd..97c0e195d19808 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -504,6 +504,7 @@ extern "C" { * Thread support is stubbed and any attempt to create a new thread fails. */ #if (!defined(HAVE_PTHREAD_STUBS) && \ + !defined(__wasi__) && \ (!defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__))) # define Py_CAN_START_THREADS 1 #endif diff --git a/InternalDocs/stack_protection.md b/InternalDocs/stack_protection.md index fa025bd930f74e..14802e57d095f4 100644 --- a/InternalDocs/stack_protection.md +++ b/InternalDocs/stack_protection.md @@ -38,12 +38,19 @@ Recursion checks are performed by `_Py_EnterRecursiveCall()` or `_Py_EnterRecurs ```python kb_used = (stack_top - stack_pointer)>>10 -if stack_pointer < hard_limit: +if stack_pointer < bottom_of_machine_stack: + pass # Our stack limits could be wrong so it is safest to do nothing. +elif stack_pointer < hard_limit: FatalError(f"Unrecoverable stack overflow (used {kb_used} kB)") elif stack_pointer < soft_limit: raise RecursionError(f"Stack overflow (used {kb_used} kB)") ``` +### User space threads and other oddities + +Some libraries provide user-space threads. These will change the C stack at runtime. +To guard against this we only raise if the stack pointer is in the window between the expected stack base and the soft limit. + ### Diagnosing and fixing stack overflows For stack protection to work correctly the amount of stack consumed between calls to `_Py_EnterRecursiveCall()` must be less than `_PyOS_STACK_MARGIN_BYTES`. diff --git a/Lib/_colorize.py b/Lib/_colorize.py index 57b712bc068d4e..29d7cc67b6e39d 100644 --- a/Lib/_colorize.py +++ b/Lib/_colorize.py @@ -1,4 +1,3 @@ -import io import os import sys @@ -169,6 +168,8 @@ class Argparse(ThemeSection): short_option: str = ANSIColors.BOLD_GREEN label: str = ANSIColors.BOLD_YELLOW action: str = ANSIColors.BOLD_GREEN + default: str = ANSIColors.GREY + default_value: str = ANSIColors.YELLOW reset: str = ANSIColors.RESET error: str = ANSIColors.BOLD_MAGENTA warning: str = ANSIColors.BOLD_YELLOW @@ -330,7 +331,7 @@ def _safe_getenv(k: str, fallback: str | None = None) -> str | None: try: return os.isatty(file.fileno()) - except io.UnsupportedOperation: + except OSError: return hasattr(file, "isatty") and file.isatty() diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py index ff1bdab9fea078..3b0debf2ba037b 100644 --- a/Lib/_pyrepl/simple_interact.py +++ b/Lib/_pyrepl/simple_interact.py @@ -31,7 +31,6 @@ import sys import code import warnings -import errno from .readline import _get_reader, multiline_input, append_history_file diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py index 33907b1fc2a53a..a5788cdbfae3f5 100644 --- a/Lib/annotationlib.py +++ b/Lib/annotationlib.py @@ -844,14 +844,9 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): def _build_closure(annotate, owner, is_class, stringifier_dict, *, allow_evaluation): if not annotate.__closure__: return None, None - freevars = annotate.__code__.co_freevars new_closure = [] cell_dict = {} - for i, cell in enumerate(annotate.__closure__): - if i < len(freevars): - name = freevars[i] - else: - name = "__cell__" + for name, cell in zip(annotate.__code__.co_freevars, annotate.__closure__, strict=True): cell_dict[name] = cell new_cell = None if allow_evaluation: diff --git a/Lib/argparse.py b/Lib/argparse.py index 6b79747572f48f..55ecdadd8c9398 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -748,7 +748,14 @@ def _get_help_string(self, action): if action.default is not SUPPRESS: defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] if action.option_strings or action.nargs in defaulting_nargs: - help += _(' (default: %(default)s)') + t = self._theme + default_str = _(" (default: %(default)s)") + prefix, suffix = default_str.split("%(default)s") + help += ( + f" {t.default}{prefix.lstrip()}" + f"{t.default_value}%(default)s" + f"{t.default}{suffix}{t.reset}" + ) return help @@ -932,15 +939,26 @@ def __init__(self, deprecated=False): _option_strings = [] + neg_option_strings = [] for option_string in option_strings: _option_strings.append(option_string) - if option_string.startswith('--'): - if option_string.startswith('--no-'): + if len(option_string) > 2 and option_string[0] == option_string[1]: + # two-dash long option: '--foo' -> '--no-foo' + if option_string.startswith('no-', 2): + raise ValueError(f'invalid option name {option_string!r} ' + f'for BooleanOptionalAction') + option_string = option_string[:2] + 'no-' + option_string[2:] + _option_strings.append(option_string) + neg_option_strings.append(option_string) + elif len(option_string) > 2 and option_string[0] != option_string[1]: + # single-dash long option: '-foo' -> '-nofoo' + if option_string.startswith('no', 1): raise ValueError(f'invalid option name {option_string!r} ' f'for BooleanOptionalAction') - option_string = '--no-' + option_string[2:] + option_string = option_string[:1] + 'no' + option_string[1:] _option_strings.append(option_string) + neg_option_strings.append(option_string) super().__init__( option_strings=_option_strings, @@ -950,11 +968,12 @@ def __init__(self, required=required, help=help, deprecated=deprecated) + self.neg_option_strings = neg_option_strings def __call__(self, parser, namespace, values, option_string=None): if option_string in self.option_strings: - setattr(namespace, self.dest, not option_string.startswith('--no-')) + setattr(namespace, self.dest, option_string not in self.neg_option_strings) def format_usage(self): return ' | '.join(self.option_strings) @@ -1660,29 +1679,35 @@ def _get_positional_kwargs(self, dest, **kwargs): def _get_optional_kwargs(self, *args, **kwargs): # determine short and long option strings option_strings = [] - long_option_strings = [] for option_string in args: # error on strings that don't start with an appropriate prefix if not option_string[0] in self.prefix_chars: raise ValueError( f'invalid option string {option_string!r}: ' f'must start with a character {self.prefix_chars!r}') - - # strings starting with two prefix characters are long options option_strings.append(option_string) - if len(option_string) > 1 and option_string[1] in self.prefix_chars: - long_option_strings.append(option_string) # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x' dest = kwargs.pop('dest', None) if dest is None: - if long_option_strings: - dest_option_string = long_option_strings[0] - else: - dest_option_string = option_strings[0] - dest = dest_option_string.lstrip(self.prefix_chars) + priority = 0 + for option_string in option_strings: + if len(option_string) <= 2: + # short option: '-x' -> 'x' + if priority < 1: + dest = option_string.lstrip(self.prefix_chars) + priority = 1 + elif option_string[1] not in self.prefix_chars: + # single-dash long option: '-foo' -> 'foo' + if priority < 2: + dest = option_string.lstrip(self.prefix_chars) + priority = 2 + else: + # two-dash long option: '--foo' -> 'foo' + dest = option_string.lstrip(self.prefix_chars) + break if not dest: - msg = f'dest= is required for options like {option_string!r}' + msg = f'dest= is required for options like {repr(option_strings)[1:-1]}' raise TypeError(msg) dest = dest.replace('-', '_') diff --git a/Lib/asyncio/streams.py b/Lib/asyncio/streams.py index 59e22f523a85c5..d2db1a930c2ad2 100644 --- a/Lib/asyncio/streams.py +++ b/Lib/asyncio/streams.py @@ -667,8 +667,7 @@ async def readuntil(self, separator=b'\n'): # adds data which makes separator be found. That's why we check for # EOF *after* inspecting the buffer. if self._eof: - chunk = bytes(self._buffer) - self._buffer.clear() + chunk = self._buffer.take_bytes() raise exceptions.IncompleteReadError(chunk, None) # _wait_for_data() will resume reading if stream was paused. @@ -678,10 +677,9 @@ async def readuntil(self, separator=b'\n'): raise exceptions.LimitOverrunError( 'Separator is found, but chunk is longer than limit', match_start) - chunk = self._buffer[:match_end] - del self._buffer[:match_end] + chunk = self._buffer.take_bytes(match_end) self._maybe_resume_transport() - return bytes(chunk) + return chunk async def read(self, n=-1): """Read up to `n` bytes from the stream. @@ -716,20 +714,16 @@ async def read(self, n=-1): # collect everything in self._buffer, but that would # deadlock if the subprocess sends more than self.limit # bytes. So just call self.read(self._limit) until EOF. - blocks = [] - while True: - block = await self.read(self._limit) - if not block: - break - blocks.append(block) - return b''.join(blocks) + joined = bytearray() + while block := await self.read(self._limit): + joined += block + return joined.take_bytes() if not self._buffer and not self._eof: await self._wait_for_data('read') # This will work right even if buffer is less than n bytes - data = bytes(memoryview(self._buffer)[:n]) - del self._buffer[:n] + data = self._buffer.take_bytes(min(len(self._buffer), n)) self._maybe_resume_transport() return data @@ -760,18 +754,12 @@ async def readexactly(self, n): while len(self._buffer) < n: if self._eof: - incomplete = bytes(self._buffer) - self._buffer.clear() + incomplete = self._buffer.take_bytes() raise exceptions.IncompleteReadError(incomplete, n) await self._wait_for_data('readexactly') - if len(self._buffer) == n: - data = bytes(self._buffer) - self._buffer.clear() - else: - data = bytes(memoryview(self._buffer)[:n]) - del self._buffer[:n] + data = self._buffer.take_bytes(n) self._maybe_resume_transport() return data diff --git a/Lib/asyncio/tools.py b/Lib/asyncio/tools.py index f39e11fdd513b4..1d463ea09ba5b8 100644 --- a/Lib/asyncio/tools.py +++ b/Lib/asyncio/tools.py @@ -1,6 +1,6 @@ """Tools to analyze tasks running in asyncio programs.""" -from collections import defaultdict, namedtuple +from collections import defaultdict from itertools import count from enum import Enum import sys diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index 25ac4d1d524bc2..55ffc36ea5b0c7 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -1542,6 +1542,8 @@ def format_map(self, mapping): return self.data.format_map(mapping) def index(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data return self.data.index(sub, start, end) def isalpha(self): @@ -1610,6 +1612,8 @@ def rfind(self, sub, start=0, end=_sys.maxsize): return self.data.rfind(sub, start, end) def rindex(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data return self.data.rindex(sub, start, end) def rjust(self, width, *args): diff --git a/Lib/html/parser.py b/Lib/html/parser.py index e50620de800d63..80fb8c3f929f6b 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -24,6 +24,7 @@ entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') +incomplete_charref = re.compile('&#(?:[0-9]|[xX][0-9a-fA-F])') attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?') starttagopen = re.compile('<[a-zA-Z]') @@ -304,10 +305,20 @@ def goahead(self, end): k = k - 1 i = self.updatepos(i, k) continue + match = incomplete_charref.match(rawdata, i) + if match: + if end: + self.handle_charref(rawdata[i+2:]) + i = self.updatepos(i, n) + break + # incomplete + break + elif i + 3 < n: # larger than "&#x" + # not the end of the buffer, and can't be confused + # with some other construct + self.handle_data("&#") + i = self.updatepos(i, i + 2) else: - if ";" in rawdata[i:]: # bail by consuming &# - self.handle_data(rawdata[i:i+2]) - i = self.updatepos(i, i+2) break elif startswith('&', i): match = entityref.match(rawdata, i) @@ -321,15 +332,13 @@ def goahead(self, end): continue match = incomplete.match(rawdata, i) if match: - # match.group() will contain at least 2 chars - if end and match.group() == rawdata[i:]: - k = match.end() - if k <= i: - k = n - i = self.updatepos(i, i + 1) + if end: + self.handle_entityref(rawdata[i+1:]) + i = self.updatepos(i, n) + break # incomplete break - elif (i + 1) < n: + elif i + 1 < n: # not the end of the buffer, and can't be confused # with some other construct self.handle_data("&") diff --git a/Lib/idlelib/idle_test/test_iomenu.py b/Lib/idlelib/idle_test/test_iomenu.py index e0642cf0cabef0..9d7d1222938c4e 100644 --- a/Lib/idlelib/idle_test/test_iomenu.py +++ b/Lib/idlelib/idle_test/test_iomenu.py @@ -1,17 +1,20 @@ -"Test , coverage 17%." - -from idlelib import iomenu +import builtins +import os +import tempfile import unittest +from unittest.mock import patch + from test.support import requires from tkinter import Tk + +from idlelib import iomenu, util from idlelib.editor import EditorWindow -from idlelib import util from idlelib.idle_test.mock_idle import Func # Fail if either tokenize.open and t.detect_encoding does not exist. # These are used in loadfile and encode. # Also used in pyshell.MI.execfile and runscript.tabnanny. -from tokenize import open, detect_encoding +from tokenize import open as tokenize_open, detect_encoding # Remove when we have proper tests that use both. @@ -36,6 +39,14 @@ def tearDownClass(cls): cls.root.destroy() del cls.root + def _create_tempfile(self, content: str) -> str: + fd, filename = tempfile.mkstemp(suffix='.py') + os.close(fd) + self.addCleanup(os.unlink, filename) + with builtins.open(filename, 'w', encoding='utf-8') as f: + f.write(content) + return filename + def test_init(self): self.assertIs(self.io.editwin, self.editwin) @@ -45,17 +56,88 @@ def test_fixnewlines_end(self): fix = io.fixnewlines text = io.editwin.text - # Make the editor temporarily look like Shell. self.editwin.interp = None shelltext = '>>> if 1' self.editwin.get_prompt_text = Func(result=shelltext) - eq(fix(), shelltext) # Get... call and '\n' not added. + eq(fix(), shelltext) # Get... call and '\n' not added. del self.editwin.interp, self.editwin.get_prompt_text text.insert(1.0, 'a') - eq(fix(), 'a'+io.eol_convention) + eq(fix(), 'a' + io.eol_convention) eq(text.get('1.0', 'end-1c'), 'a\n') - eq(fix(), 'a'+io.eol_convention) + eq(fix(), 'a' + io.eol_convention) + + def test_reload_no_file(self): + io = self.io + io.filename = None + + with patch('idlelib.iomenu.messagebox.showinfo') as mock_showinfo: + result = io.reload(None) + self.assertEqual(result, "break") + mock_showinfo.assert_called_once() + args, kwargs = mock_showinfo.call_args + self.assertIn("File Not Found", args[0]) + + def test_reload_with_file(self): + io = self.io + text = io.editwin.text + original_content = "# Original content\n" + modified_content = "# Modified content\n" + + filename = self._create_tempfile(original_content) + io.filename = filename + + with patch('idlelib.iomenu.messagebox.showerror') as mock_showerror: + io.loadfile(io.filename) + self.assertEqual(text.get('1.0', 'end-1c'), original_content) + + with builtins.open(filename, 'w', encoding='utf-8') as f: + f.write(modified_content) + + result = io.reload(None) + + mock_showerror.assert_not_called() + self.assertEqual(result, "break") + self.assertEqual(text.get('1.0', 'end-1c'), modified_content) + + def test_reload_with_unsaved_changes_cancel(self): + io = self.io + text = io.editwin.text + original_content = "# Original content\n" + unsaved_content = original_content + "\n# Unsaved change" + + filename = self._create_tempfile(original_content) + io.filename = filename + io.loadfile(io.filename) + + text.insert('end', "\n# Unsaved change") + io.set_saved(False) + + with patch('idlelib.iomenu.messagebox.askokcancel', return_value=False) as mock_ask: + result = io.reload(None) + + self.assertEqual(result, "break") + self.assertEqual(text.get('1.0', 'end-1c'), unsaved_content) + mock_ask.assert_called_once() + + def test_reload_with_unsaved_changes_confirm(self): + io = self.io + text = io.editwin.text + original_content = "# Original content\n" + + filename = self._create_tempfile(original_content) + io.filename = filename + io.loadfile(io.filename) + + text.insert('end', "\n# Unsaved change") + io.set_saved(False) + + with patch('idlelib.iomenu.messagebox.askokcancel', return_value=True) as mock_ask: + result = io.reload(None) + + self.assertEqual(result, "break") + self.assertEqual(text.get('1.0', 'end-1c'), original_content) + mock_ask.assert_called_once() def _extension_in_filetypes(extension): diff --git a/Lib/idlelib/iomenu.py b/Lib/idlelib/iomenu.py index 464126e2df0668..7509bd36427992 100644 --- a/Lib/idlelib/iomenu.py +++ b/Lib/idlelib/iomenu.py @@ -31,6 +31,7 @@ def __init__(self, editwin): self.save_as) self.__id_savecopy = self.text.bind("<>", self.save_a_copy) + self.__id_reload = self.text.bind("<>", self.reload) self.fileencoding = 'utf-8' self.__id_print = self.text.bind("<>", self.print_window) @@ -40,6 +41,7 @@ def close(self): self.text.unbind("<>", self.__id_save) self.text.unbind("<>",self.__id_saveas) self.text.unbind("<>", self.__id_savecopy) + self.text.unbind("<>", self.__id_reload) self.text.unbind("<>", self.__id_print) # Break cycles self.editwin = None @@ -237,6 +239,35 @@ def save_a_copy(self, event): self.updaterecentfileslist(filename) return "break" + def reload(self, event): + """Reload the file from disk, discarding any unsaved changes. + + If the file has unsaved changes, ask the user to confirm. + """ + if not self.filename: + messagebox.showinfo( + "File Not Found", + "This window has no associated file to reload.", + parent=self.text) + self.text.focus_set() + return "break" + + if not self.get_saved(): + confirm = messagebox.askokcancel( + title="Reload File", + message=f"Discard changes to {self.filename}?", + default=messagebox.CANCEL, + parent=self.text) + if not confirm: + self.text.focus_set() + return "break" + + # Reload the file + self.loadfile(self.filename) + + self.text.focus_set() + return "break" + def writefile(self, filename): text = self.fixnewlines() chars = self.encode(text) diff --git a/Lib/idlelib/mainmenu.py b/Lib/idlelib/mainmenu.py index 91a32cebb513f9..6162b07a33a65e 100644 --- a/Lib/idlelib/mainmenu.py +++ b/Lib/idlelib/mainmenu.py @@ -31,6 +31,7 @@ ('_Save', '<>'), ('Save _As...', '<>'), ('Save Cop_y As...', '<>'), + ('_Reload from Disk', '<>'), None, ('Prin_t Window', '<>'), None, diff --git a/Lib/pdb.py b/Lib/pdb.py index 76bb28d7396452..60b713ebaf3d1a 100644 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -346,8 +346,8 @@ def __init__(self, completekey='tab', stdin=None, stdout=None, skip=None, bdb.Bdb.__init__(self, skip=skip, backend=backend if backend else get_default_backend()) cmd.Cmd.__init__(self, completekey, stdin, stdout) sys.audit("pdb.Pdb") - if stdout: - self.use_rawinput = 0 + if stdin: + self.use_rawinput = False self.prompt = '(Pdb) ' self.aliases = {} self.displaying = {} @@ -654,7 +654,7 @@ def _show_display(self): def _get_tb_and_exceptions(self, tb_or_exc): """ - Given a tracecack or an exception, return a tuple of chained exceptions + Given a traceback or an exception, return a tuple of chained exceptions and current traceback to inspect. This will deal with selecting the right ``__cause__`` or ``__context__`` @@ -2429,7 +2429,9 @@ def print_stack_trace(self, count=None): except KeyboardInterrupt: pass - def print_stack_entry(self, frame_lineno, prompt_prefix=line_prefix): + def print_stack_entry(self, frame_lineno, prompt_prefix=None): + if prompt_prefix is None: + prompt_prefix = line_prefix frame, lineno = frame_lineno if frame is self.curframe: prefix = '> ' diff --git a/Lib/pickletools.py b/Lib/pickletools.py index 254b6c7fcc9dd2..29baf3be7ebb6e 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -2839,7 +2839,7 @@ def __init__(self, value): } -if __name__ == "__main__": +def _main(args=None): import argparse parser = argparse.ArgumentParser( description='disassemble one or more pickle files', @@ -2864,7 +2864,7 @@ def __init__(self, value): '-p', '--preamble', default="==> {name} <==", help='if more than one pickle file is specified, print this before' ' each disassembly') - args = parser.parse_args() + args = parser.parse_args(args) annotate = 30 if args.annotate else 0 memo = {} if args.memo else None if args.output is None: @@ -2885,3 +2885,7 @@ def __init__(self, value): finally: if output is not sys.stdout: output.close() + + +if __name__ == "__main__": + _main() diff --git a/Lib/platform.py b/Lib/platform.py index 4db93bea2a39e1..b5017dbdb02252 100644 --- a/Lib/platform.py +++ b/Lib/platform.py @@ -197,7 +197,7 @@ def libc_ver(executable=None, lib='', version='', chunksize=16384): | (GLIBC_([0-9.]+)) | (libc(_\w+)?\.so(?:\.(\d[0-9.]*))?) | (musl-([0-9.]+)) - | (libc.musl(?:-\w+)?.so(?:\.(\d[0-9.]*))?) + | ((?:libc\.|ld-)musl(?:-\w+)?.so(?:\.(\d[0-9.]*))?) """, re.ASCII | re.VERBOSE) @@ -236,7 +236,7 @@ def libc_ver(executable=None, lib='', version='', chunksize=16384): elif V(glibcversion) > V(ver): ver = glibcversion elif so: - if lib != 'glibc': + if lib not in ('glibc', 'musl'): lib = 'libc' if soversion and (not ver or V(soversion) > V(ver)): ver = soversion diff --git a/Lib/profiling/sampling/__main__.py b/Lib/profiling/sampling/__main__.py index cd1425b8b9c7d3..47bd3a0113eb3d 100644 --- a/Lib/profiling/sampling/__main__.py +++ b/Lib/profiling/sampling/__main__.py @@ -45,7 +45,7 @@ system restrictions or missing privileges. """ -from .sample import main +from .cli import main def handle_permission_error(): """Handle PermissionError by displaying appropriate error message.""" diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py new file mode 100644 index 00000000000000..aede6a4d3e9f1b --- /dev/null +++ b/Lib/profiling/sampling/cli.py @@ -0,0 +1,705 @@ +"""Command-line interface for the sampling profiler.""" + +import argparse +import os +import socket +import subprocess +import sys + +from .sample import sample, sample_live +from .pstats_collector import PstatsCollector +from .stack_collector import CollapsedStackCollector, FlamegraphCollector +from .gecko_collector import GeckoCollector +from .constants import ( + PROFILING_MODE_ALL, + PROFILING_MODE_WALL, + PROFILING_MODE_CPU, + PROFILING_MODE_GIL, + SORT_MODE_NSAMPLES, + SORT_MODE_TOTTIME, + SORT_MODE_CUMTIME, + SORT_MODE_SAMPLE_PCT, + SORT_MODE_CUMUL_PCT, + SORT_MODE_NSAMPLES_CUMUL, +) + +try: + from .live_collector import LiveStatsCollector +except ImportError: + LiveStatsCollector = None + + +class CustomFormatter( + argparse.ArgumentDefaultsHelpFormatter, + argparse.RawDescriptionHelpFormatter, +): + """Custom formatter that combines default values display with raw description formatting.""" + pass + + +_HELP_DESCRIPTION = """Sample a process's stack frames and generate profiling data. + +Commands: + run Run and profile a script or module + attach Attach to and profile a running process + +Examples: + # Run and profile a script + python -m profiling.sampling run script.py arg1 arg2 + + # Attach to a running process + python -m profiling.sampling attach 1234 + + # Live interactive mode for a script + python -m profiling.sampling run --live script.py + + # Live interactive mode for a running process + python -m profiling.sampling attach --live 1234 + +Use 'python -m profiling.sampling --help' for command-specific help.""" + + +# Constants for socket synchronization +_SYNC_TIMEOUT = 5.0 +_PROCESS_KILL_TIMEOUT = 2.0 +_READY_MESSAGE = b"ready" +_RECV_BUFFER_SIZE = 1024 + +# Format configuration +FORMAT_EXTENSIONS = { + "pstats": "pstats", + "collapsed": "txt", + "flamegraph": "html", + "gecko": "json", +} + +COLLECTOR_MAP = { + "pstats": PstatsCollector, + "collapsed": CollapsedStackCollector, + "flamegraph": FlamegraphCollector, + "gecko": GeckoCollector, +} + + +def _parse_mode(mode_string): + """Convert mode string to mode constant.""" + mode_map = { + "wall": PROFILING_MODE_WALL, + "cpu": PROFILING_MODE_CPU, + "gil": PROFILING_MODE_GIL, + } + return mode_map[mode_string] + + +def _run_with_sync(original_cmd, suppress_output=False): + """Run a command with socket-based synchronization and return the process.""" + # Create a TCP socket for synchronization with better socket options + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sync_sock: + # Set SO_REUSEADDR to avoid "Address already in use" errors + sync_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sync_sock.bind(("127.0.0.1", 0)) # Let OS choose a free port + sync_port = sync_sock.getsockname()[1] + sync_sock.listen(1) + sync_sock.settimeout(_SYNC_TIMEOUT) + + # Get current working directory to preserve it + cwd = os.getcwd() + + # Build command using the sync coordinator + target_args = original_cmd[1:] # Remove python executable + cmd = ( + sys.executable, + "-m", + "profiling.sampling._sync_coordinator", + str(sync_port), + cwd, + ) + tuple(target_args) + + # Start the process with coordinator + # Suppress stdout/stderr if requested (for live mode) + popen_kwargs = {} + if suppress_output: + popen_kwargs["stdin"] = subprocess.DEVNULL + popen_kwargs["stdout"] = subprocess.DEVNULL + popen_kwargs["stderr"] = subprocess.DEVNULL + + process = subprocess.Popen(cmd, **popen_kwargs) + + try: + # Wait for ready signal with timeout + with sync_sock.accept()[0] as conn: + ready_signal = conn.recv(_RECV_BUFFER_SIZE) + + if ready_signal != _READY_MESSAGE: + raise RuntimeError( + f"Invalid ready signal received: {ready_signal!r}" + ) + + except socket.timeout: + # If we timeout, kill the process and raise an error + if process.poll() is None: + process.terminate() + try: + process.wait(timeout=_PROCESS_KILL_TIMEOUT) + except subprocess.TimeoutExpired: + process.kill() + process.wait() + raise RuntimeError( + "Process failed to signal readiness within timeout" + ) + + return process + + +def _add_sampling_options(parser): + """Add sampling configuration options to a parser.""" + sampling_group = parser.add_argument_group("Sampling configuration") + sampling_group.add_argument( + "-i", + "--interval", + type=int, + default=100, + metavar="MICROSECONDS", + help="sampling interval", + ) + sampling_group.add_argument( + "-d", + "--duration", + type=int, + default=10, + metavar="SECONDS", + help="Sampling duration", + ) + sampling_group.add_argument( + "-a", + "--all-threads", + action="store_true", + help="Sample all threads in the process instead of just the main thread", + ) + sampling_group.add_argument( + "--realtime-stats", + action="store_true", + help="Print real-time sampling statistics (Hz, mean, min, max) during profiling", + ) + sampling_group.add_argument( + "--native", + action="store_true", + help='Include artificial "" frames to denote calls to non-Python code', + ) + sampling_group.add_argument( + "--no-gc", + action="store_false", + dest="gc", + help='Don\'t include artificial "" frames to denote active garbage collection', + ) + + +def _add_mode_options(parser): + """Add mode options to a parser.""" + mode_group = parser.add_argument_group("Mode options") + mode_group.add_argument( + "--mode", + choices=["wall", "cpu", "gil"], + default="wall", + help="Sampling mode: wall (all samples), cpu (only samples when thread is on CPU), " + "gil (only samples when thread holds the GIL)", + ) + + +def _add_format_options(parser): + """Add output format options to a parser.""" + output_group = parser.add_argument_group("Output options") + format_group = output_group.add_mutually_exclusive_group() + format_group.add_argument( + "--pstats", + action="store_const", + const="pstats", + dest="format", + help="Generate pstats output (default)", + ) + format_group.add_argument( + "--collapsed", + action="store_const", + const="collapsed", + dest="format", + help="Generate collapsed stack traces for flamegraphs", + ) + format_group.add_argument( + "--flamegraph", + action="store_const", + const="flamegraph", + dest="format", + help="Generate interactive HTML flamegraph visualization", + ) + format_group.add_argument( + "--gecko", + action="store_const", + const="gecko", + dest="format", + help="Generate Gecko format for Firefox Profiler", + ) + parser.set_defaults(format="pstats") + + output_group.add_argument( + "-o", + "--output", + dest="outfile", + help="Save output to a file (default: stdout for pstats, " + "auto-generated filename for other formats)", + ) + + +def _add_pstats_options(parser): + """Add pstats-specific display options to a parser.""" + pstats_group = parser.add_argument_group("pstats format options") + pstats_group.add_argument( + "--sort", + choices=[ + "nsamples", + "tottime", + "cumtime", + "sample-pct", + "cumul-pct", + "nsamples-cumul", + "name", + ], + default=None, + help="Sort order for pstats output (default: nsamples)", + ) + pstats_group.add_argument( + "-l", + "--limit", + type=int, + default=None, + help="Limit the number of rows in the output (default: 15)", + ) + pstats_group.add_argument( + "--no-summary", + action="store_true", + help="Disable the summary section in the pstats output", + ) + + +def _sort_to_mode(sort_choice): + """Convert sort choice string to SORT_MODE constant.""" + sort_map = { + "nsamples": SORT_MODE_NSAMPLES, + "tottime": SORT_MODE_TOTTIME, + "cumtime": SORT_MODE_CUMTIME, + "sample-pct": SORT_MODE_SAMPLE_PCT, + "cumul-pct": SORT_MODE_CUMUL_PCT, + "nsamples-cumul": SORT_MODE_NSAMPLES_CUMUL, + "name": -1, + } + return sort_map.get(sort_choice, SORT_MODE_NSAMPLES) + + +def _create_collector(format_type, interval, skip_idle): + """Create the appropriate collector based on format type. + + Args: + format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko') + interval: Sampling interval in microseconds + skip_idle: Whether to skip idle samples + + Returns: + A collector instance of the appropriate type + """ + collector_class = COLLECTOR_MAP.get(format_type) + if collector_class is None: + raise ValueError(f"Unknown format: {format_type}") + + # Gecko format never skips idle (it needs both GIL and CPU data) + if format_type == "gecko": + skip_idle = False + + return collector_class(interval, skip_idle=skip_idle) + + +def _generate_output_filename(format_type, pid): + """Generate output filename based on format and PID. + + Args: + format_type: The output format + pid: Process ID + + Returns: + Generated filename + """ + extension = FORMAT_EXTENSIONS.get(format_type, "txt") + return f"{format_type}.{pid}.{extension}" + + +def _handle_output(collector, args, pid, mode): + """Handle output for the collector based on format and arguments. + + Args: + collector: The collector instance with profiling data + args: Parsed command-line arguments + pid: Process ID (for generating filenames) + mode: Profiling mode used + """ + if args.format == "pstats": + if args.outfile: + collector.export(args.outfile) + else: + # Print to stdout with defaults applied + sort_choice = args.sort if args.sort is not None else "nsamples" + limit = args.limit if args.limit is not None else 15 + sort_mode = _sort_to_mode(sort_choice) + collector.print_stats( + sort_mode, limit, not args.no_summary, mode + ) + else: + # Export to file + filename = args.outfile or _generate_output_filename(args.format, pid) + collector.export(filename) + + +def _validate_args(args, parser): + """Validate format-specific options and live mode requirements. + + Args: + args: Parsed command-line arguments + parser: ArgumentParser instance for error reporting + """ + # Check if live mode is available + if hasattr(args, 'live') and args.live and LiveStatsCollector is None: + parser.error( + "Live mode requires the curses module, which is not available." + ) + + # Live mode is incompatible with format options + if hasattr(args, 'live') and args.live: + if args.format != "pstats": + format_flag = f"--{args.format}" + parser.error( + f"--live is incompatible with {format_flag}. Live mode uses a TUI interface." + ) + + # Live mode is also incompatible with pstats-specific options + issues = [] + if args.sort is not None: + issues.append("--sort") + if args.limit is not None: + issues.append("--limit") + if args.no_summary: + issues.append("--no-summary") + + if issues: + parser.error( + f"Options {', '.join(issues)} are incompatible with --live. " + "Live mode uses a TUI interface with its own controls." + ) + return + + # Validate gecko mode doesn't use non-wall mode + if args.format == "gecko" and args.mode != "wall": + parser.error( + "--mode option is incompatible with --gecko. " + "Gecko format automatically includes both GIL-holding and CPU status analysis." + ) + + # Validate pstats-specific options are only used with pstats format + if args.format != "pstats": + issues = [] + if args.sort is not None: + issues.append("--sort") + if args.limit is not None: + issues.append("--limit") + if args.no_summary: + issues.append("--no-summary") + + if issues: + format_flag = f"--{args.format}" + parser.error( + f"Options {', '.join(issues)} are only valid with --pstats, not {format_flag}" + ) + + +def main(): + """Main entry point for the CLI.""" + # Create the main parser + parser = argparse.ArgumentParser( + description=_HELP_DESCRIPTION, + formatter_class=CustomFormatter, + ) + + # Create subparsers for commands + subparsers = parser.add_subparsers( + dest="command", required=True, help="Command to run" + ) + + # === RUN COMMAND === + run_parser = subparsers.add_parser( + "run", + help="Run and profile a script or module", + formatter_class=CustomFormatter, + description="""Run and profile a Python script or module + +Examples: + # Run and profile a module + python -m profiling.sampling run -m mymodule arg1 arg2 + + # Generate flamegraph from a script + python -m profiling.sampling run --flamegraph -o output.html script.py + + # Profile with custom interval and duration + python -m profiling.sampling run -i 50 -d 30 script.py + + # Save collapsed stacks to file + python -m profiling.sampling run --collapsed -o stacks.txt script.py + + # Live interactive mode for a script + python -m profiling.sampling run --live script.py""", + ) + run_parser.add_argument( + "-m", + "--module", + action="store_true", + help="Run target as a module (like python -m)", + ) + run_parser.add_argument( + "target", + help="Script file or module name to profile", + ) + run_parser.add_argument( + "args", + nargs=argparse.REMAINDER, + help="Arguments to pass to the script or module", + ) + run_parser.add_argument( + "--live", + action="store_true", + help="Interactive TUI profiler (top-like interface, press 'q' to quit, 's' to cycle sort)", + ) + _add_sampling_options(run_parser) + _add_mode_options(run_parser) + _add_format_options(run_parser) + _add_pstats_options(run_parser) + + # === ATTACH COMMAND === + attach_parser = subparsers.add_parser( + "attach", + help="Attach to and profile a running process", + formatter_class=CustomFormatter, + description="""Attach to a running process and profile it + +Examples: + # Profile all threads, sort by total time + python -m profiling.sampling attach -a --sort tottime 1234 + + # Live interactive mode for a running process + python -m profiling.sampling attach --live 1234""", + ) + attach_parser.add_argument( + "pid", + type=int, + help="Process ID to attach to", + ) + attach_parser.add_argument( + "--live", + action="store_true", + help="Interactive TUI profiler (top-like interface, press 'q' to quit, 's' to cycle sort)", + ) + _add_sampling_options(attach_parser) + _add_mode_options(attach_parser) + _add_format_options(attach_parser) + _add_pstats_options(attach_parser) + + # Parse arguments + args = parser.parse_args() + + # Validate arguments + _validate_args(args, parser) + + # Command dispatch table + command_handlers = { + "run": _handle_run, + "attach": _handle_attach, + } + + # Execute the appropriate command + handler = command_handlers.get(args.command) + if handler: + handler(args) + else: + parser.error(f"Unknown command: {args.command}") + + +def _handle_attach(args): + """Handle the 'attach' command.""" + # Check if live mode is requested + if args.live: + _handle_live_attach(args, args.pid) + return + + # Use PROFILING_MODE_ALL for gecko format + mode = ( + PROFILING_MODE_ALL + if args.format == "gecko" + else _parse_mode(args.mode) + ) + + # Determine skip_idle based on mode + skip_idle = ( + mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False + ) + + # Create the appropriate collector + collector = _create_collector(args.format, args.interval, skip_idle) + + # Sample the process + collector = sample( + args.pid, + collector, + duration_sec=args.duration, + all_threads=args.all_threads, + realtime_stats=args.realtime_stats, + mode=mode, + native=args.native, + gc=args.gc, + ) + + # Handle output + _handle_output(collector, args, args.pid, mode) + + +def _handle_run(args): + """Handle the 'run' command.""" + # Check if live mode is requested + if args.live: + _handle_live_run(args) + return + + # Build the command to run + if args.module: + cmd = (sys.executable, "-m", args.target, *args.args) + else: + cmd = (sys.executable, args.target, *args.args) + + # Run with synchronization + process = _run_with_sync(cmd, suppress_output=False) + + # Use PROFILING_MODE_ALL for gecko format + mode = ( + PROFILING_MODE_ALL + if args.format == "gecko" + else _parse_mode(args.mode) + ) + + # Determine skip_idle based on mode + skip_idle = ( + mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False + ) + + # Create the appropriate collector + collector = _create_collector(args.format, args.interval, skip_idle) + + # Profile the subprocess + try: + collector = sample( + process.pid, + collector, + duration_sec=args.duration, + all_threads=args.all_threads, + realtime_stats=args.realtime_stats, + mode=mode, + native=args.native, + gc=args.gc, + ) + + # Handle output + _handle_output(collector, args, process.pid, mode) + finally: + # Clean up the subprocess + if process.poll() is None: + process.terminate() + try: + process.wait(timeout=_PROCESS_KILL_TIMEOUT) + except subprocess.TimeoutExpired: + process.kill() + process.wait() + + +def _handle_live_attach(args, pid): + """Handle live mode for an existing process.""" + mode = _parse_mode(args.mode) + + # Determine skip_idle based on mode + skip_idle = mode != PROFILING_MODE_WALL + + # Create live collector with default settings + collector = LiveStatsCollector( + args.interval, + skip_idle=skip_idle, + sort_by="tottime", # Default initial sort + limit=20, # Default limit + pid=pid, + mode=mode, + ) + + # Sample in live mode + sample_live( + pid, + collector, + duration_sec=args.duration, + all_threads=args.all_threads, + realtime_stats=args.realtime_stats, + mode=mode, + native=args.native, + gc=args.gc, + ) + + +def _handle_live_run(args): + """Handle live mode for running a script/module.""" + # Build the command to run + if args.module: + cmd = (sys.executable, "-m", args.target, *args.args) + else: + cmd = (sys.executable, args.target, *args.args) + + # Run with synchronization, suppressing output for live mode + process = _run_with_sync(cmd, suppress_output=True) + + mode = _parse_mode(args.mode) + + # Determine skip_idle based on mode + skip_idle = mode != PROFILING_MODE_WALL + + # Create live collector with default settings + collector = LiveStatsCollector( + args.interval, + skip_idle=skip_idle, + sort_by="tottime", # Default initial sort + limit=20, # Default limit + pid=process.pid, + mode=mode, + ) + + # Profile the subprocess in live mode + try: + sample_live( + process.pid, + collector, + duration_sec=args.duration, + all_threads=args.all_threads, + realtime_stats=args.realtime_stats, + mode=mode, + native=args.native, + gc=args.gc, + ) + finally: + # Clean up the subprocess + if process.poll() is None: + process.terminate() + try: + process.wait(timeout=_PROCESS_KILL_TIMEOUT) + except subprocess.TimeoutExpired: + process.kill() + process.wait() + + +if __name__ == "__main__": + main() diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py index 3c2325ef77268c..27d40156d1f6a0 100644 --- a/Lib/profiling/sampling/collector.py +++ b/Lib/profiling/sampling/collector.py @@ -1,20 +1,19 @@ from abc import ABC, abstractmethod - -# Thread status flags -try: - from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED -except ImportError: - # Fallback for tests or when module is not available - THREAD_STATUS_HAS_GIL = (1 << 0) - THREAD_STATUS_ON_CPU = (1 << 1) - THREAD_STATUS_UNKNOWN = (1 << 2) - THREAD_STATUS_GIL_REQUESTED = (1 << 3) +from .constants import ( + THREAD_STATUS_HAS_GIL, + THREAD_STATUS_ON_CPU, + THREAD_STATUS_UNKNOWN, + THREAD_STATUS_GIL_REQUESTED, +) class Collector(ABC): @abstractmethod def collect(self, stack_frames): """Collect profiling data from stack frames.""" + def collect_failed_sample(self): + """Collect data about a failed sample attempt.""" + @abstractmethod def export(self, filename): """Export collected data to a file.""" diff --git a/Lib/profiling/sampling/constants.py b/Lib/profiling/sampling/constants.py new file mode 100644 index 00000000000000..be2ae60a88f114 --- /dev/null +++ b/Lib/profiling/sampling/constants.py @@ -0,0 +1,30 @@ +"""Constants for the sampling profiler.""" + +# Profiling mode constants +PROFILING_MODE_WALL = 0 +PROFILING_MODE_CPU = 1 +PROFILING_MODE_GIL = 2 +PROFILING_MODE_ALL = 3 # Combines GIL + CPU checks + +# Sort mode constants +SORT_MODE_NSAMPLES = 0 +SORT_MODE_TOTTIME = 1 +SORT_MODE_CUMTIME = 2 +SORT_MODE_SAMPLE_PCT = 3 +SORT_MODE_CUMUL_PCT = 4 +SORT_MODE_NSAMPLES_CUMUL = 5 + +# Thread status flags +try: + from _remote_debugging import ( + THREAD_STATUS_HAS_GIL, + THREAD_STATUS_ON_CPU, + THREAD_STATUS_UNKNOWN, + THREAD_STATUS_GIL_REQUESTED, + ) +except ImportError: + # Fallback for tests or when module is not available + THREAD_STATUS_HAS_GIL = (1 << 0) + THREAD_STATUS_ON_CPU = (1 << 1) + THREAD_STATUS_UNKNOWN = (1 << 2) + THREAD_STATUS_GIL_REQUESTED = (1 << 3) diff --git a/Lib/profiling/sampling/gecko_collector.py b/Lib/profiling/sampling/gecko_collector.py index 21c427b7c862a4..921cd625f04e3f 100644 --- a/Lib/profiling/sampling/gecko_collector.py +++ b/Lib/profiling/sampling/gecko_collector.py @@ -56,7 +56,8 @@ class GeckoCollector(Collector): - def __init__(self, *, skip_idle=False): + def __init__(self, sample_interval_usec, *, skip_idle=False): + self.sample_interval_usec = sample_interval_usec self.skip_idle = skip_idle self.start_time = time.time() * 1000 # milliseconds since epoch diff --git a/Lib/profiling/sampling/live_collector/__init__.py b/Lib/profiling/sampling/live_collector/__init__.py new file mode 100644 index 00000000000000..175e4610d232c5 --- /dev/null +++ b/Lib/profiling/sampling/live_collector/__init__.py @@ -0,0 +1,200 @@ +"""Live profiling collector that displays top-like statistics using curses. + + ┌─────────────────────────────┐ + │ Target Python Process │ + │ (being profiled) │ + └──────────────┬──────────────┘ + │ Stack sampling at + │ configured interval + │ (e.g., 10000µs) + ▼ + ┌─────────────────────────────┐ + │ LiveStatsCollector │ + │ ┌───────────────────────┐ │ + │ │ collect() │ │ Aggregates samples + │ │ - Iterates frames │ │ into statistics + │ │ - Updates counters │ │ + │ └───────────┬───────────┘ │ + │ │ │ + │ ▼ │ + │ ┌───────────────────────┐ │ + │ │ Data Storage │ │ + │ │ - result dict │ │ Tracks per-function: + │ │ - direct_calls │ │ • Direct samples + │ │ - cumulative_calls │ │ • Cumulative samples + │ └───────────┬───────────┘ │ • Derived time stats + │ │ │ + │ ▼ │ + │ ┌───────────────────────┐ │ + │ │ Display Update │ │ + │ │ (10Hz by default) │ │ Rate-limited refresh + │ └───────────┬───────────┘ │ + └──────────────┼──────────────┘ + │ + ▼ + ┌─────────────────────────────┐ + │ DisplayInterface │ + │ (Abstract layer) │ + └──────────────┬──────────────┘ + ┌───────┴────────┐ + │ │ + ┌──────────▼────────┐ ┌───▼──────────┐ + │ CursesDisplay │ │ MockDisplay │ + │ - Real terminal │ │ - Testing │ + │ - ncurses backend │ │ - No UI │ + └─────────┬─────────┘ └──────────────┘ + │ + ▼ + ┌─────────────────────────────────────┐ + │ Widget-Based Rendering │ + │ ┌─────────────────────────────────┐ │ + │ │ HeaderWidget │ │ + │ │ • PID, uptime, time, interval │ │ + │ │ • Sample stats & progress bar │ │ + │ │ • Efficiency bar │ │ + │ │ • Thread status & GC stats │ │ + │ │ • Function summary │ │ + │ │ • Top 3 hottest functions │ │ + │ ├─────────────────────────────────┤ │ + │ │ TableWidget │ │ + │ │ • Column headers (sortable) │ │ Interactive display + │ │ • Stats rows (scrolling) │ │ with keyboard controls: + │ │ - nsamples % time │ │ s: sort, p: pause + │ │ - function file:line │ │ r: reset, /: filter + │ ├─────────────────────────────────┤ │ q: quit, h: help + │ │ FooterWidget │ │ + │ │ • Legend and status │ │ + │ │ • Filter input prompt │ │ + │ └─────────────────────────────────┘ │ + └─────────────────────────────────────┘ + +Architecture: + +The live collector is organized into four layers. The data collection layer +(LiveStatsCollector) aggregates stack samples into per-function statistics without +any knowledge of how they will be presented. The display abstraction layer +(DisplayInterface) defines rendering operations without coupling to curses or any +specific UI framework. The widget layer (Widget, HeaderWidget, TableWidget, +FooterWidget, HelpWidget, ProgressBarWidget) encapsulates individual UI components +with their own rendering logic, promoting modularity and reusability. The +presentation layer (CursesDisplay/MockDisplay) implements the actual rendering for +terminal output and testing. + +The system runs two independent update loops. The sampling loop is driven by the +profiler at the configured interval (e.g., 10000µs) and continuously collects +stack frames and updates statistics. The display loop runs at a fixed refresh rate +(default 10Hz) and updates the terminal independently of sampling frequency. This +separation allows high-frequency sampling without overwhelming the terminal with +constant redraws. + +Statistics are computed incrementally as samples arrive. The collector maintains +running counters (direct calls and cumulative calls) in a dictionary keyed by +function location. Derived metrics like time estimates and percentages are computed +on-demand during display updates rather than being stored, which minimizes memory +overhead as the number of tracked functions grows. + +User input is processed asynchronously during display updates using non-blocking I/O. +This allows interactive controls (sorting, filtering, pausing) without interrupting +the data collection pipeline. The collector maintains mode flags (paused, +filter_input_mode) that affect what gets displayed but not what gets collected. + +""" + +# Re-export all public classes and constants for backward compatibility +from .collector import LiveStatsCollector +from .display import DisplayInterface, CursesDisplay, MockDisplay +from .widgets import ( + Widget, + ProgressBarWidget, + HeaderWidget, + TableWidget, + FooterWidget, + HelpWidget, +) +from .constants import ( + MICROSECONDS_PER_SECOND, + DISPLAY_UPDATE_HZ, + DISPLAY_UPDATE_INTERVAL, + MIN_TERMINAL_WIDTH, + MIN_TERMINAL_HEIGHT, + WIDTH_THRESHOLD_SAMPLE_PCT, + WIDTH_THRESHOLD_TOTTIME, + WIDTH_THRESHOLD_CUMUL_PCT, + WIDTH_THRESHOLD_CUMTIME, + HEADER_LINES, + FOOTER_LINES, + SAFETY_MARGIN, + TOP_FUNCTIONS_DISPLAY_COUNT, + COL_WIDTH_NSAMPLES, + COL_SPACING, + COL_WIDTH_SAMPLE_PCT, + COL_WIDTH_TIME, + MIN_FUNC_NAME_WIDTH, + MAX_FUNC_NAME_WIDTH, + MIN_AVAILABLE_SPACE, + MIN_BAR_WIDTH, + MAX_SAMPLE_RATE_BAR_WIDTH, + MAX_EFFICIENCY_BAR_WIDTH, + MIN_SAMPLE_RATE_FOR_SCALING, + FINISHED_BANNER_EXTRA_LINES, + COLOR_PAIR_HEADER_BG, + COLOR_PAIR_CYAN, + COLOR_PAIR_YELLOW, + COLOR_PAIR_GREEN, + COLOR_PAIR_MAGENTA, + COLOR_PAIR_RED, + COLOR_PAIR_SORTED_HEADER, + DEFAULT_SORT_BY, + DEFAULT_DISPLAY_LIMIT, +) + +__all__ = [ + # Main collector + "LiveStatsCollector", + # Display interfaces + "DisplayInterface", + "CursesDisplay", + "MockDisplay", + # Widgets + "Widget", + "ProgressBarWidget", + "HeaderWidget", + "TableWidget", + "FooterWidget", + "HelpWidget", + # Constants + "MICROSECONDS_PER_SECOND", + "DISPLAY_UPDATE_HZ", + "DISPLAY_UPDATE_INTERVAL", + "MIN_TERMINAL_WIDTH", + "MIN_TERMINAL_HEIGHT", + "WIDTH_THRESHOLD_SAMPLE_PCT", + "WIDTH_THRESHOLD_TOTTIME", + "WIDTH_THRESHOLD_CUMUL_PCT", + "WIDTH_THRESHOLD_CUMTIME", + "HEADER_LINES", + "FOOTER_LINES", + "SAFETY_MARGIN", + "TOP_FUNCTIONS_DISPLAY_COUNT", + "COL_WIDTH_NSAMPLES", + "COL_SPACING", + "COL_WIDTH_SAMPLE_PCT", + "COL_WIDTH_TIME", + "MIN_FUNC_NAME_WIDTH", + "MAX_FUNC_NAME_WIDTH", + "MIN_AVAILABLE_SPACE", + "MIN_BAR_WIDTH", + "MAX_SAMPLE_RATE_BAR_WIDTH", + "MAX_EFFICIENCY_BAR_WIDTH", + "MIN_SAMPLE_RATE_FOR_SCALING", + "FINISHED_BANNER_EXTRA_LINES", + "COLOR_PAIR_HEADER_BG", + "COLOR_PAIR_CYAN", + "COLOR_PAIR_YELLOW", + "COLOR_PAIR_GREEN", + "COLOR_PAIR_MAGENTA", + "COLOR_PAIR_RED", + "COLOR_PAIR_SORTED_HEADER", + "DEFAULT_SORT_BY", + "DEFAULT_DISPLAY_LIMIT", +] diff --git a/Lib/profiling/sampling/live_collector/collector.py b/Lib/profiling/sampling/live_collector/collector.py new file mode 100644 index 00000000000000..4b69275a2f077f --- /dev/null +++ b/Lib/profiling/sampling/live_collector/collector.py @@ -0,0 +1,1015 @@ +"""LiveStatsCollector - Main collector class for live profiling.""" + +import collections +import contextlib +import curses +from dataclasses import dataclass, field +import os +import site +import sys +import sysconfig +import time +import _colorize + +from ..collector import Collector +from ..constants import ( + THREAD_STATUS_HAS_GIL, + THREAD_STATUS_ON_CPU, + THREAD_STATUS_UNKNOWN, + THREAD_STATUS_GIL_REQUESTED, + PROFILING_MODE_CPU, + PROFILING_MODE_GIL, + PROFILING_MODE_WALL, +) +from .constants import ( + MICROSECONDS_PER_SECOND, + DISPLAY_UPDATE_INTERVAL, + MIN_TERMINAL_WIDTH, + MIN_TERMINAL_HEIGHT, + HEADER_LINES, + FOOTER_LINES, + SAFETY_MARGIN, + FINISHED_BANNER_EXTRA_LINES, + DEFAULT_SORT_BY, + DEFAULT_DISPLAY_LIMIT, + COLOR_PAIR_HEADER_BG, + COLOR_PAIR_CYAN, + COLOR_PAIR_YELLOW, + COLOR_PAIR_GREEN, + COLOR_PAIR_MAGENTA, + COLOR_PAIR_RED, + COLOR_PAIR_SORTED_HEADER, +) +from .display import CursesDisplay +from .widgets import HeaderWidget, TableWidget, FooterWidget, HelpWidget +from .trend_tracker import TrendTracker + + +@dataclass +class ThreadData: + """Encapsulates all profiling data for a single thread.""" + + thread_id: int + + # Function call statistics: {location: {direct_calls: int, cumulative_calls: int}} + result: dict = field(default_factory=lambda: collections.defaultdict( + lambda: dict(direct_calls=0, cumulative_calls=0) + )) + + # Thread status statistics + has_gil: int = 0 + on_cpu: int = 0 + gil_requested: int = 0 + unknown: int = 0 + total: int = 0 # Total status samples for this thread + + # Sample counts + sample_count: int = 0 + gc_frame_samples: int = 0 + + def increment_status_flag(self, status_flags): + """Update status counts based on status bit flags.""" + if status_flags & THREAD_STATUS_HAS_GIL: + self.has_gil += 1 + if status_flags & THREAD_STATUS_ON_CPU: + self.on_cpu += 1 + if status_flags & THREAD_STATUS_GIL_REQUESTED: + self.gil_requested += 1 + if status_flags & THREAD_STATUS_UNKNOWN: + self.unknown += 1 + self.total += 1 + + def as_status_dict(self): + """Return status counts as a dict for compatibility.""" + return { + "has_gil": self.has_gil, + "on_cpu": self.on_cpu, + "gil_requested": self.gil_requested, + "unknown": self.unknown, + "total": self.total, + } + + +class LiveStatsCollector(Collector): + """Collector that displays live top-like statistics using ncurses.""" + + def __init__( + self, + sample_interval_usec, + *, + skip_idle=False, + sort_by=DEFAULT_SORT_BY, + limit=DEFAULT_DISPLAY_LIMIT, + pid=None, + display=None, + mode=None, + ): + """ + Initialize the live stats collector. + + Args: + sample_interval_usec: Sampling interval in microseconds + skip_idle: Whether to skip idle threads + sort_by: Sort key ('tottime', 'nsamples', 'cumtime', 'sample_pct', 'cumul_pct') + limit: Maximum number of functions to display + pid: Process ID being profiled + display: DisplayInterface implementation (None means curses will be used) + mode: Profiling mode ('cpu', 'gil', etc.) - affects what stats are shown + """ + self.result = collections.defaultdict( + lambda: dict(total_rec_calls=0, direct_calls=0, cumulative_calls=0) + ) + self.sample_interval_usec = sample_interval_usec + self.sample_interval_sec = ( + sample_interval_usec / MICROSECONDS_PER_SECOND + ) + self.skip_idle = skip_idle + self.sort_by = sort_by + self.limit = limit + self.total_samples = 0 + self.start_time = None + self.stdscr = None + self.display = display # DisplayInterface implementation + self.running = True + self.pid = pid + self.mode = mode # Profiling mode + self._saved_stdout = None + self._saved_stderr = None + self._devnull = None + self._last_display_update = None + self.max_sample_rate = 0 # Track maximum sample rate seen + self.successful_samples = 0 # Track samples that captured frames + self.failed_samples = 0 # Track samples that failed to capture frames + self.display_update_interval = DISPLAY_UPDATE_INTERVAL # Instance variable for display refresh rate + + # Thread status statistics (bit flags) + self.thread_status_counts = { + "has_gil": 0, + "on_cpu": 0, + "gil_requested": 0, + "unknown": 0, + "total": 0, # Total thread count across all samples + } + self.gc_frame_samples = 0 # Track samples with GC frames + + # Interactive controls state + self.paused = False # Pause UI updates (profiling continues) + self.show_help = False # Show help screen + self.filter_pattern = None # Glob pattern to filter functions + self.filter_input_mode = False # Currently entering filter text + self.filter_input_buffer = "" # Buffer for filter input + self.finished = False # Program has finished, showing final state + self.finish_timestamp = None # When profiling finished (for time freezing) + self.finish_wall_time = None # Wall clock time when profiling finished + + # Thread tracking state + self.thread_ids = [] # List of thread IDs seen + self.view_mode = "ALL" # "ALL" or "PER_THREAD" + self.current_thread_index = ( + 0 # Index into thread_ids when in PER_THREAD mode + ) + self.per_thread_data = {} # {thread_id: ThreadData} + + # Calculate common path prefixes to strip + self._path_prefixes = self._get_common_path_prefixes() + + # Widgets (initialized when display is available) + self.header_widget = None + self.table_widget = None + self.footer_widget = None + self.help_widget = None + + # Color mode + self._can_colorize = _colorize.can_colorize() + + # Trend tracking (initialized after colors are set up) + self._trend_tracker = None + + @property + def elapsed_time(self): + """Get the elapsed time, frozen when finished.""" + if self.finished and self.finish_timestamp is not None: + return self.finish_timestamp - self.start_time + return time.perf_counter() - self.start_time if self.start_time else 0 + + @property + def current_time_display(self): + """Get the current time for display, frozen when finished.""" + if self.finished and self.finish_wall_time is not None: + return time.strftime("%H:%M:%S", time.localtime(self.finish_wall_time)) + return time.strftime("%H:%M:%S") + + def _get_or_create_thread_data(self, thread_id): + """Get or create ThreadData for a thread ID.""" + if thread_id not in self.per_thread_data: + self.per_thread_data[thread_id] = ThreadData(thread_id=thread_id) + return self.per_thread_data[thread_id] + + def _get_current_thread_data(self): + """Get ThreadData for currently selected thread in PER_THREAD mode.""" + if self.view_mode == "PER_THREAD" and self.current_thread_index < len(self.thread_ids): + thread_id = self.thread_ids[self.current_thread_index] + return self.per_thread_data.get(thread_id) + return None + + def _get_current_result_source(self): + """Get result dict for current view mode (aggregated or per-thread).""" + if self.view_mode == "ALL": + return self.result + thread_data = self._get_current_thread_data() + return thread_data.result if thread_data else {} + + def _get_common_path_prefixes(self): + """Get common path prefixes to strip from file paths.""" + prefixes = [] + + # Get the actual stdlib location from the os module + # This works for both installed Python and development builds + os_module_file = os.__file__ + if os_module_file: + # os.__file__ points to os.py, get its directory + stdlib_dir = os.path.dirname(os.path.abspath(os_module_file)) + prefixes.append(stdlib_dir) + + # Get stdlib location from sysconfig (may be different or same) + stdlib_path = sysconfig.get_path("stdlib") + if stdlib_path: + prefixes.append(stdlib_path) + + # Get platstdlib location (platform-specific stdlib) + platstdlib_path = sysconfig.get_path("platstdlib") + if platstdlib_path: + prefixes.append(platstdlib_path) + + # Get site-packages locations + for site_path in site.getsitepackages(): + prefixes.append(site_path) + + # Also check user site-packages + user_site = site.getusersitepackages() + if user_site: + prefixes.append(user_site) + + # Remove duplicates and sort by length (longest first) to match most specific paths first + prefixes = list(set(prefixes)) + prefixes.sort(key=lambda x: len(x), reverse=True) + + return prefixes + + def simplify_path(self, filepath): + """Simplify a file path by removing common prefixes.""" + # Try to match against known prefixes + for prefix_path in self._path_prefixes: + if filepath.startswith(prefix_path): + # Remove the prefix completely + relative = filepath[len(prefix_path) :].lstrip(os.sep) + return relative + + # If no match, return the original path + return filepath + + def process_frames(self, frames, thread_id=None): + """Process a single thread's frame stack. + + Args: + frames: List of frame information + thread_id: Thread ID for per-thread tracking (optional) + """ + if not frames: + return + + # Get per-thread data if tracking per-thread + thread_data = self._get_or_create_thread_data(thread_id) if thread_id is not None else None + + # Process each frame in the stack to track cumulative calls + for frame in frames: + location = (frame.filename, frame.lineno, frame.funcname) + self.result[location]["cumulative_calls"] += 1 + if thread_data: + thread_data.result[location]["cumulative_calls"] += 1 + + # The top frame gets counted as an inline call (directly executing) + top_location = (frames[0].filename, frames[0].lineno, frames[0].funcname) + self.result[top_location]["direct_calls"] += 1 + if thread_data: + thread_data.result[top_location]["direct_calls"] += 1 + + def collect_failed_sample(self): + self.failed_samples += 1 + self.total_samples += 1 + + def collect(self, stack_frames): + """Collect and display profiling data.""" + if self.start_time is None: + self.start_time = time.perf_counter() + self._last_display_update = self.start_time + + # Thread status counts for this sample + temp_status_counts = { + "has_gil": 0, + "on_cpu": 0, + "gil_requested": 0, + "unknown": 0, + "total": 0, + } + has_gc_frame = False + + # Always collect data, even when paused + # Track thread status flags and GC frames + for interpreter_info in stack_frames: + threads = getattr(interpreter_info, "threads", []) + for thread_info in threads: + temp_status_counts["total"] += 1 + + # Track thread status using bit flags + status_flags = getattr(thread_info, "status", 0) + thread_id = getattr(thread_info, "thread_id", None) + + # Update aggregated counts + if status_flags & THREAD_STATUS_HAS_GIL: + temp_status_counts["has_gil"] += 1 + if status_flags & THREAD_STATUS_ON_CPU: + temp_status_counts["on_cpu"] += 1 + if status_flags & THREAD_STATUS_GIL_REQUESTED: + temp_status_counts["gil_requested"] += 1 + if status_flags & THREAD_STATUS_UNKNOWN: + temp_status_counts["unknown"] += 1 + + # Update per-thread status counts + if thread_id is not None: + thread_data = self._get_or_create_thread_data(thread_id) + thread_data.increment_status_flag(status_flags) + + # Process frames (respecting skip_idle) + if self.skip_idle: + has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL) + on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU) + if not (has_gil or on_cpu): + continue + + frames = getattr(thread_info, "frame_info", None) + if frames: + self.process_frames(frames, thread_id=thread_id) + + # Track thread IDs only for threads that actually have samples + if ( + thread_id is not None + and thread_id not in self.thread_ids + ): + self.thread_ids.append(thread_id) + + # Increment per-thread sample count and check for GC frames + thread_has_gc_frame = False + for frame in frames: + funcname = getattr(frame, "funcname", "") + if "" in funcname or "gc_collect" in funcname: + has_gc_frame = True + thread_has_gc_frame = True + break + + if thread_id is not None: + thread_data = self._get_or_create_thread_data(thread_id) + thread_data.sample_count += 1 + if thread_has_gc_frame: + thread_data.gc_frame_samples += 1 + + # Update cumulative thread status counts + for key, count in temp_status_counts.items(): + self.thread_status_counts[key] += count + + if has_gc_frame: + self.gc_frame_samples += 1 + + self.successful_samples += 1 + self.total_samples += 1 + + # Handle input on every sample for instant responsiveness + if self.display is not None: + self._handle_input() + + # Update display at configured rate if display is initialized and not paused + if self.display is not None and not self.paused: + current_time = time.perf_counter() + if ( + self._last_display_update is None + or (current_time - self._last_display_update) + >= self.display_update_interval + ): + self._update_display() + self._last_display_update = current_time + + def _prepare_display_data(self, height): + """Prepare data for display rendering.""" + elapsed = self.elapsed_time + stats_list = self.build_stats_list() + + # Calculate available space for stats + # Add extra lines for finished banner when in finished state + extra_header_lines = ( + FINISHED_BANNER_EXTRA_LINES if self.finished else 0 + ) + max_stats_lines = max( + 0, + height + - HEADER_LINES + - extra_header_lines + - FOOTER_LINES + - SAFETY_MARGIN, + ) + stats_list = stats_list[:max_stats_lines] + + return elapsed, stats_list + + def _initialize_widgets(self, colors): + """Initialize widgets with display and colors.""" + if self.header_widget is None: + # Initialize trend tracker with colors + if self._trend_tracker is None: + self._trend_tracker = TrendTracker(colors, enabled=True) + + self.header_widget = HeaderWidget(self.display, colors, self) + self.table_widget = TableWidget(self.display, colors, self) + self.footer_widget = FooterWidget(self.display, colors, self) + self.help_widget = HelpWidget(self.display, colors) + + def _render_display_sections( + self, height, width, elapsed, stats_list, colors + ): + """Render all display sections to the screen.""" + line = 0 + try: + # Initialize widgets if not already done + self._initialize_widgets(colors) + + # Render header + line = self.header_widget.render( + line, width, elapsed=elapsed, stats_list=stats_list + ) + + # Render table + line = self.table_widget.render( + line, width, height=height, stats_list=stats_list + ) + + except curses.error: + pass + + def _update_display(self): + """Update the display with current stats.""" + try: + # Clear screen and get dimensions + self.display.clear() + height, width = self.display.get_dimensions() + + # Check terminal size + if width < MIN_TERMINAL_WIDTH or height < MIN_TERMINAL_HEIGHT: + self._show_terminal_too_small(height, width) + self.display.refresh() + return + + # Setup colors and initialize widgets (needed for both help and normal display) + colors = self._setup_colors() + self._initialize_widgets(colors) + + # Show help screen if requested + if self.show_help: + self.help_widget.render(0, width, height=height) + self.display.refresh() + return + + # Prepare data + elapsed, stats_list = self._prepare_display_data(height) + + # Render all sections + self._render_display_sections( + height, width, elapsed, stats_list, colors + ) + + # Footer + self.footer_widget.render(height - 2, width) + + # Show filter input prompt if in filter input mode + if self.filter_input_mode: + self.footer_widget.render_filter_input_prompt( + height - 1, width + ) + + # Refresh display + self.display.redraw() + self.display.refresh() + + except Exception: + pass + + def _cycle_sort(self, reverse=False): + """Cycle through different sort modes in column order. + + Args: + reverse: If True, cycle backwards (right to left), otherwise forward (left to right) + """ + sort_modes = [ + "nsamples", + "sample_pct", + "tottime", + "cumul_pct", + "cumtime", + ] + try: + current_idx = sort_modes.index(self.sort_by) + if reverse: + self.sort_by = sort_modes[(current_idx - 1) % len(sort_modes)] + else: + self.sort_by = sort_modes[(current_idx + 1) % len(sort_modes)] + except ValueError: + self.sort_by = "nsamples" + + def _setup_colors(self): + """Set up color pairs and return color attributes.""" + + A_BOLD = self.display.get_attr("A_BOLD") + A_REVERSE = self.display.get_attr("A_REVERSE") + A_UNDERLINE = self.display.get_attr("A_UNDERLINE") + A_NORMAL = self.display.get_attr("A_NORMAL") + + # Check both curses color support and _colorize.can_colorize() + if self.display.has_colors() and self._can_colorize: + with contextlib.suppress(Exception): + # Color constants (using curses values for compatibility) + COLOR_CYAN = 6 + COLOR_GREEN = 2 + COLOR_YELLOW = 3 + COLOR_BLACK = 0 + COLOR_MAGENTA = 5 + COLOR_RED = 1 + + # Initialize all color pairs used throughout the UI + self.display.init_color_pair( + 1, COLOR_CYAN, -1 + ) # Data colors for stats rows + self.display.init_color_pair(2, COLOR_GREEN, -1) + self.display.init_color_pair(3, COLOR_YELLOW, -1) + self.display.init_color_pair( + COLOR_PAIR_HEADER_BG, COLOR_BLACK, COLOR_GREEN + ) + self.display.init_color_pair( + COLOR_PAIR_CYAN, COLOR_CYAN, COLOR_BLACK + ) + self.display.init_color_pair( + COLOR_PAIR_YELLOW, COLOR_YELLOW, COLOR_BLACK + ) + self.display.init_color_pair( + COLOR_PAIR_GREEN, COLOR_GREEN, COLOR_BLACK + ) + self.display.init_color_pair( + COLOR_PAIR_MAGENTA, COLOR_MAGENTA, COLOR_BLACK + ) + self.display.init_color_pair( + COLOR_PAIR_RED, COLOR_RED, COLOR_BLACK + ) + self.display.init_color_pair( + COLOR_PAIR_SORTED_HEADER, COLOR_BLACK, COLOR_YELLOW + ) + + return { + "header": self.display.get_color_pair(COLOR_PAIR_HEADER_BG) + | A_BOLD, + "cyan": self.display.get_color_pair(COLOR_PAIR_CYAN) + | A_BOLD, + "yellow": self.display.get_color_pair(COLOR_PAIR_YELLOW) + | A_BOLD, + "green": self.display.get_color_pair(COLOR_PAIR_GREEN) + | A_BOLD, + "magenta": self.display.get_color_pair(COLOR_PAIR_MAGENTA) + | A_BOLD, + "red": self.display.get_color_pair(COLOR_PAIR_RED) + | A_BOLD, + "sorted_header": self.display.get_color_pair( + COLOR_PAIR_SORTED_HEADER + ) + | A_BOLD, + "normal_header": A_REVERSE | A_BOLD, + "color_samples": self.display.get_color_pair(1), + "color_file": self.display.get_color_pair(2), + "color_func": self.display.get_color_pair(3), + # Trend colors (stock-like indicators) + "trend_up": self.display.get_color_pair(COLOR_PAIR_GREEN) | A_BOLD, + "trend_down": self.display.get_color_pair(COLOR_PAIR_RED) | A_BOLD, + "trend_stable": A_NORMAL, + } + + # Fallback to non-color attributes + return { + "header": A_REVERSE | A_BOLD, + "cyan": A_BOLD, + "yellow": A_BOLD, + "green": A_BOLD, + "magenta": A_BOLD, + "red": A_BOLD, + "sorted_header": A_REVERSE | A_BOLD | A_UNDERLINE, + "normal_header": A_REVERSE | A_BOLD, + "color_samples": A_NORMAL, + "color_file": A_NORMAL, + "color_func": A_NORMAL, + # Trend colors (fallback to bold/normal for monochrome) + "trend_up": A_BOLD, + "trend_down": A_BOLD, + "trend_stable": A_NORMAL, + } + + def build_stats_list(self): + """Build and sort the statistics list.""" + stats_list = [] + result_source = self._get_current_result_source() + + for func, call_counts in result_source.items(): + # Apply filter if set (using substring matching) + if self.filter_pattern: + filename, lineno, funcname = func + # Simple substring match (case-insensitive) + pattern_lower = self.filter_pattern.lower() + filename_lower = filename.lower() + funcname_lower = funcname.lower() + + # Match if pattern is substring of filename, funcname, or combined + matched = ( + pattern_lower in filename_lower + or pattern_lower in funcname_lower + or pattern_lower in f"{filename_lower}:{funcname_lower}" + ) + if not matched: + continue + + direct_calls = call_counts.get("direct_calls", 0) + cumulative_calls = call_counts.get("cumulative_calls", 0) + total_time = direct_calls * self.sample_interval_sec + cumulative_time = cumulative_calls * self.sample_interval_sec + + # Calculate sample percentages + sample_pct = (direct_calls / self.total_samples * 100) if self.total_samples > 0 else 0 + cumul_pct = (cumulative_calls / self.total_samples * 100) if self.total_samples > 0 else 0 + + # Calculate trends for all columns using TrendTracker + trends = {} + if self._trend_tracker is not None: + trends = self._trend_tracker.update_metrics( + func, + { + 'nsamples': direct_calls, + 'tottime': total_time, + 'cumtime': cumulative_time, + 'sample_pct': sample_pct, + 'cumul_pct': cumul_pct, + } + ) + + stats_list.append( + { + "func": func, + "direct_calls": direct_calls, + "cumulative_calls": cumulative_calls, + "total_time": total_time, + "cumulative_time": cumulative_time, + "trends": trends, # Dictionary of trends for all columns + } + ) + + # Sort the stats + if self.sort_by == "nsamples": + stats_list.sort(key=lambda x: x["direct_calls"], reverse=True) + elif self.sort_by == "tottime": + stats_list.sort(key=lambda x: x["total_time"], reverse=True) + elif self.sort_by == "cumtime": + stats_list.sort(key=lambda x: x["cumulative_time"], reverse=True) + elif self.sort_by == "sample_pct": + stats_list.sort( + key=lambda x: (x["direct_calls"] / self.total_samples * 100) + if self.total_samples > 0 + else 0, + reverse=True, + ) + elif self.sort_by == "cumul_pct": + stats_list.sort( + key=lambda x: ( + x["cumulative_calls"] / self.total_samples * 100 + ) + if self.total_samples > 0 + else 0, + reverse=True, + ) + + return stats_list + + def reset_stats(self): + """Reset all collected statistics.""" + self.result.clear() + self.per_thread_data.clear() + self.thread_ids.clear() + self.view_mode = "ALL" + self.current_thread_index = 0 + self.total_samples = 0 + self.successful_samples = 0 + self.failed_samples = 0 + self.max_sample_rate = 0 + self.thread_status_counts = { + "has_gil": 0, + "on_cpu": 0, + "gil_requested": 0, + "unknown": 0, + "total": 0, + } + self.gc_frame_samples = 0 + # Clear trend tracking + if self._trend_tracker is not None: + self._trend_tracker.clear() + # Reset finished state and finish timestamp + self.finished = False + self.finish_timestamp = None + self.finish_wall_time = None + self.start_time = time.perf_counter() + self._last_display_update = self.start_time + + def mark_finished(self): + """Mark the profiling session as finished.""" + self.finished = True + # Capture the finish timestamp to freeze all timing displays + self.finish_timestamp = time.perf_counter() + self.finish_wall_time = time.time() # Wall clock time for display + # Force a final display update to show the finished message + if self.display is not None: + self._update_display() + + def _handle_finished_input_update(self, had_input): + """Update display after input when program is finished.""" + if self.finished and had_input and self.display is not None: + self._update_display() + + def _show_terminal_too_small(self, height, width): + """Display a message when terminal is too small.""" + A_BOLD = self.display.get_attr("A_BOLD") + msg1 = "Terminal too small!" + msg2 = f"Need: {MIN_TERMINAL_WIDTH}x{MIN_TERMINAL_HEIGHT}" + msg3 = f"Have: {width}x{height}" + msg4 = "Please resize" + + # Center the messages + if height >= 4: + self.display.add_str( + height // 2 - 2, + max(0, (width - len(msg1)) // 2), + msg1[: width - 1], + A_BOLD, + ) + self.display.add_str( + height // 2 - 1, + max(0, (width - len(msg2)) // 2), + msg2[: width - 1], + ) + self.display.add_str( + height // 2, + max(0, (width - len(msg3)) // 2), + msg3[: width - 1], + ) + self.display.add_str( + height // 2 + 1, + max(0, (width - len(msg4)) // 2), + msg4[: width - 1], + ) + elif height >= 1: + self.display.add_str(0, 0, msg1[: width - 1], A_BOLD) + + def _show_terminal_size_warning_and_wait(self, height, width): + """Show terminal size warning during initialization and wait for user acknowledgment.""" + A_BOLD = self.display.get_attr("A_BOLD") + A_DIM = self.display.get_attr("A_DIM") + + self.display.clear() + msg1 = "WARNING: Terminal too small!" + msg2 = f"Required: {MIN_TERMINAL_WIDTH}x{MIN_TERMINAL_HEIGHT}" + msg3 = f"Current: {width}x{height}" + msg4 = "Please resize your terminal for best experience" + msg5 = "Press any key to continue..." + + # Center the messages + if height >= 5: + self.display.add_str( + height // 2 - 2, + max(0, (width - len(msg1)) // 2), + msg1[: width - 1], + A_BOLD, + ) + self.display.add_str( + height // 2 - 1, + max(0, (width - len(msg2)) // 2), + msg2[: width - 1], + ) + self.display.add_str( + height // 2, + max(0, (width - len(msg3)) // 2), + msg3[: width - 1], + ) + self.display.add_str( + height // 2 + 1, + max(0, (width - len(msg4)) // 2), + msg4[: width - 1], + ) + self.display.add_str( + height // 2 + 3, + max(0, (width - len(msg5)) // 2), + msg5[: width - 1], + A_DIM, + ) + elif height >= 1: + self.display.add_str(0, 0, msg1[: width - 1], A_BOLD) + + self.display.refresh() + # Wait for user acknowledgment (2 seconds timeout) + self.display.set_nodelay(False) + # Note: timeout is curses-specific, skipping for now + self.display.get_input() + self.display.set_nodelay(True) + + def _handle_input(self): + """Handle keyboard input (non-blocking).""" + from . import constants + + self.display.set_nodelay(True) + ch = self.display.get_input() + + # Handle filter input mode FIRST - takes precedence over all commands + if self.filter_input_mode: + if ch == 27: # ESC key + self.filter_input_mode = False + self.filter_input_buffer = "" + elif ch == 10 or ch == 13: # Enter key + self.filter_pattern = ( + self.filter_input_buffer + if self.filter_input_buffer + else None + ) + self.filter_input_mode = False + self.filter_input_buffer = "" + elif ch == 127 or ch == 263: # Backspace + if self.filter_input_buffer: + self.filter_input_buffer = self.filter_input_buffer[:-1] + elif ch >= 32 and ch < 127: # Printable characters + self.filter_input_buffer += chr(ch) + + # Update display if input was processed while finished + self._handle_finished_input_update(ch != -1) + return + + # Handle help toggle keys + if ch == ord("h") or ch == ord("H") or ch == ord("?"): + self.show_help = not self.show_help + + # If showing help, any other key closes it + elif self.show_help and ch != -1: + self.show_help = False + + # Handle regular commands + if ch == ord("q") or ch == ord("Q"): + self.running = False + + elif ch == ord("s"): + self._cycle_sort(reverse=False) + + elif ch == ord("S"): + self._cycle_sort(reverse=True) + + elif ch == ord("p") or ch == ord("P"): + self.paused = not self.paused + + elif ch == ord("r") or ch == ord("R"): + # Don't allow reset when profiling is finished + if not self.finished: + self.reset_stats() + + elif ch == ord("+") or ch == ord("="): + # Decrease update interval (faster refresh) + self.display_update_interval = max( + 0.05, self.display_update_interval - 0.05 + ) # Min 20Hz + + elif ch == ord("-") or ch == ord("_"): + # Increase update interval (slower refresh) + self.display_update_interval = min( + 1.0, self.display_update_interval + 0.05 + ) # Max 1Hz + + elif ch == ord("c") or ch == ord("C"): + if self.filter_pattern: + self.filter_pattern = None + + elif ch == ord("/"): + self.filter_input_mode = True + self.filter_input_buffer = self.filter_pattern or "" + + elif ch == ord("t") or ch == ord("T"): + # Toggle between ALL and PER_THREAD modes + if self.view_mode == "ALL": + if len(self.thread_ids) > 0: + self.view_mode = "PER_THREAD" + self.current_thread_index = 0 + else: + self.view_mode = "ALL" + + elif ch == ord("x") or ch == ord("X"): + # Toggle trend colors on/off + if self._trend_tracker is not None: + self._trend_tracker.toggle() + + elif ch == curses.KEY_LEFT or ch == curses.KEY_UP: + # Navigate to previous thread in PER_THREAD mode, or switch from ALL to PER_THREAD + if len(self.thread_ids) > 0: + if self.view_mode == "ALL": + self.view_mode = "PER_THREAD" + self.current_thread_index = 0 + else: + self.current_thread_index = ( + self.current_thread_index - 1 + ) % len(self.thread_ids) + + elif ch == curses.KEY_RIGHT or ch == curses.KEY_DOWN: + # Navigate to next thread in PER_THREAD mode, or switch from ALL to PER_THREAD + if len(self.thread_ids) > 0: + if self.view_mode == "ALL": + self.view_mode = "PER_THREAD" + self.current_thread_index = 0 + else: + self.current_thread_index = ( + self.current_thread_index + 1 + ) % len(self.thread_ids) + + # Update display if input was processed while finished + self._handle_finished_input_update(ch != -1) + + def init_curses(self, stdscr): + """Initialize curses display and suppress stdout/stderr.""" + self.stdscr = stdscr + self.display = CursesDisplay(stdscr) + + # Check terminal size upfront and warn if too small + height, width = self.display.get_dimensions() + + if width < MIN_TERMINAL_WIDTH or height < MIN_TERMINAL_HEIGHT: + # Show warning and wait briefly for user to see it + self._show_terminal_size_warning_and_wait(height, width) + + curses.curs_set(0) # Hide cursor + stdscr.nodelay(True) # Non-blocking input + stdscr.scrollok(False) # Disable scrolling + stdscr.idlok(False) # Disable hardware insert/delete + stdscr.leaveok(True) # Don't care about cursor position + + if curses.has_colors(): + curses.start_color() + curses.use_default_colors() + + # Suppress stdout and stderr to prevent interfering with curses display + # Use contextlib.redirect_stdout/stderr for better resource management + self._saved_stdout = sys.stdout + self._saved_stderr = sys.stderr + # Open devnull and ensure it's cleaned up even if an exception occurs + try: + self._devnull = open(os.devnull, "w") + sys.stdout = self._devnull + sys.stderr = self._devnull + except Exception: + # If redirection fails, restore original streams + sys.stdout = self._saved_stdout + sys.stderr = self._saved_stderr + raise + + # Initial clear + self.display.clear() + self.display.refresh() + + def cleanup_curses(self): + """Clean up curses display and restore stdout/stderr.""" + # Restore stdout and stderr in reverse order + # Use try-finally to ensure cleanup even if restoration fails + try: + if self._saved_stdout is not None: + sys.stdout = self._saved_stdout + self._saved_stdout = None + if self._saved_stderr is not None: + sys.stderr = self._saved_stderr + self._saved_stderr = None + finally: + # Always close devnull, even if stdout/stderr restoration fails + if self._devnull is not None: + with contextlib.suppress(Exception): + self._devnull.close() + self._devnull = None + + if self.display is not None and self.stdscr is not None: + with contextlib.suppress(Exception): + curses.curs_set(1) # Show cursor + self.display.set_nodelay(False) + + def export(self, filename): + """Export is not supported in live mode.""" + raise NotImplementedError( + "Export to file is not supported in live mode. " + "Use the live TUI to view statistics in real-time." + ) diff --git a/Lib/profiling/sampling/live_collector/constants.py b/Lib/profiling/sampling/live_collector/constants.py new file mode 100644 index 00000000000000..e4690c90bafb7f --- /dev/null +++ b/Lib/profiling/sampling/live_collector/constants.py @@ -0,0 +1,59 @@ +"""Constants for the live profiling collector.""" + +# Time conversion constants +MICROSECONDS_PER_SECOND = 1_000_000 + +# Display update constants +DISPLAY_UPDATE_HZ = 10 +DISPLAY_UPDATE_INTERVAL = 1.0 / DISPLAY_UPDATE_HZ # 0.1 seconds + +# Terminal size constraints +MIN_TERMINAL_WIDTH = 60 +MIN_TERMINAL_HEIGHT = 12 + +# Column width thresholds +WIDTH_THRESHOLD_SAMPLE_PCT = 80 +WIDTH_THRESHOLD_TOTTIME = 100 +WIDTH_THRESHOLD_CUMUL_PCT = 120 +WIDTH_THRESHOLD_CUMTIME = 140 + +# Display layout constants +HEADER_LINES = 10 # Increased to include thread status line +FOOTER_LINES = 2 +SAFETY_MARGIN = 1 +TOP_FUNCTIONS_DISPLAY_COUNT = 3 + +# Column widths for data display +COL_WIDTH_NSAMPLES = 13 +COL_SPACING = 2 +COL_WIDTH_SAMPLE_PCT = 5 +COL_WIDTH_TIME = 10 + +# Function name display +MIN_FUNC_NAME_WIDTH = 10 +MAX_FUNC_NAME_WIDTH = 40 +MIN_AVAILABLE_SPACE = 10 + +# Progress bar display +MIN_BAR_WIDTH = 10 +MAX_SAMPLE_RATE_BAR_WIDTH = 30 +MAX_EFFICIENCY_BAR_WIDTH = 60 + +# Sample rate scaling +MIN_SAMPLE_RATE_FOR_SCALING = 100 + +# Finished banner display +FINISHED_BANNER_EXTRA_LINES = 3 # Blank line + banner + blank line + +# Color pair IDs +COLOR_PAIR_HEADER_BG = 4 +COLOR_PAIR_CYAN = 5 +COLOR_PAIR_YELLOW = 6 +COLOR_PAIR_GREEN = 7 +COLOR_PAIR_MAGENTA = 8 +COLOR_PAIR_RED = 9 +COLOR_PAIR_SORTED_HEADER = 10 + +# Default display settings +DEFAULT_SORT_BY = "nsamples" # Number of samples in leaf (self time) +DEFAULT_DISPLAY_LIMIT = 20 diff --git a/Lib/profiling/sampling/live_collector/display.py b/Lib/profiling/sampling/live_collector/display.py new file mode 100644 index 00000000000000..d7f65ad73fdc6d --- /dev/null +++ b/Lib/profiling/sampling/live_collector/display.py @@ -0,0 +1,236 @@ +"""Display interface abstractions for the live profiling collector.""" + +import contextlib +import curses +from abc import ABC, abstractmethod + + +class DisplayInterface(ABC): + """Abstract interface for display operations to enable testing.""" + + @abstractmethod + def get_dimensions(self): + """Get terminal dimensions as (height, width).""" + pass + + @abstractmethod + def clear(self): + """Clear the screen.""" + pass + + @abstractmethod + def refresh(self): + """Refresh the screen to show changes.""" + pass + + @abstractmethod + def redraw(self): + """Redraw the entire window.""" + pass + + @abstractmethod + def add_str(self, line, col, text, attr=0): + """Add a string at the specified position.""" + pass + + @abstractmethod + def get_input(self): + """Get a character from input (non-blocking). Returns -1 if no input.""" + pass + + @abstractmethod + def set_nodelay(self, flag): + """Set non-blocking mode for input.""" + pass + + @abstractmethod + def has_colors(self): + """Check if terminal supports colors.""" + pass + + @abstractmethod + def init_color_pair(self, pair_id, fg, bg): + """Initialize a color pair.""" + pass + + @abstractmethod + def get_color_pair(self, pair_id): + """Get a color pair attribute.""" + pass + + @abstractmethod + def get_attr(self, name): + """Get a display attribute by name (e.g., 'A_BOLD', 'A_REVERSE').""" + pass + + +class CursesDisplay(DisplayInterface): + """Real curses display implementation.""" + + def __init__(self, stdscr): + self.stdscr = stdscr + + def get_dimensions(self): + return self.stdscr.getmaxyx() + + def clear(self): + self.stdscr.clear() + + def refresh(self): + self.stdscr.refresh() + + def redraw(self): + self.stdscr.redrawwin() + + def add_str(self, line, col, text, attr=0): + try: + height, width = self.get_dimensions() + if 0 <= line < height and 0 <= col < width: + max_len = width - col - 1 + if len(text) > max_len: + text = text[:max_len] + self.stdscr.addstr(line, col, text, attr) + except curses.error: + pass + + def get_input(self): + try: + return self.stdscr.getch() + except (KeyError, curses.error): + return -1 + + def set_nodelay(self, flag): + self.stdscr.nodelay(flag) + + def has_colors(self): + return curses.has_colors() + + def init_color_pair(self, pair_id, fg, bg): + try: + curses.init_pair(pair_id, fg, bg) + except curses.error: + pass + + def get_color_pair(self, pair_id): + return curses.color_pair(pair_id) + + def get_attr(self, name): + return getattr(curses, name, 0) + + +class MockDisplay(DisplayInterface): + """Mock display for testing.""" + + def __init__(self, height=40, width=160): + self.height = height + self.width = width + self.buffer = {} + self.cleared = False + self.refreshed = False + self.redrawn = False + self.input_queue = [] + self.nodelay_flag = True + self.colors_supported = True + self.color_pairs = {} + + def get_dimensions(self): + return (self.height, self.width) + + def clear(self): + self.buffer.clear() + self.cleared = True + + def refresh(self): + self.refreshed = True + + def redraw(self): + self.redrawn = True + + def add_str(self, line, col, text, attr=0): + if 0 <= line < self.height and 0 <= col < self.width: + max_len = self.width - col - 1 + if len(text) > max_len: + text = text[:max_len] + self.buffer[(line, col)] = (text, attr) + + def get_input(self): + if self.input_queue: + return self.input_queue.pop(0) + return -1 + + def set_nodelay(self, flag): + self.nodelay_flag = flag + + def has_colors(self): + return self.colors_supported + + def init_color_pair(self, pair_id, fg, bg): + self.color_pairs[pair_id] = (fg, bg) + + def get_color_pair(self, pair_id): + return pair_id << 8 + + def get_attr(self, name): + attrs = { + "A_NORMAL": 0, + "A_BOLD": 1 << 16, + "A_REVERSE": 1 << 17, + "A_UNDERLINE": 1 << 18, + "A_DIM": 1 << 19, + } + return attrs.get(name, 0) + + def simulate_input(self, char): + """Helper method for tests to simulate keyboard input.""" + self.input_queue.append(char) + + def get_text_at(self, line, col): + """Helper method for tests to inspect buffer content.""" + if (line, col) in self.buffer: + return self.buffer[(line, col)][0] + return None + + def get_all_lines(self): + """Get all display content as a list of lines (for testing).""" + if not self.buffer: + return [] + + max_line = max(pos[0] for pos in self.buffer.keys()) + lines = [] + for line_num in range(max_line + 1): + line_parts = [] + for col in range(self.width): + if (line_num, col) in self.buffer: + text, _ = self.buffer[(line_num, col)] + line_parts.append((col, text)) + + # Reconstruct line from parts + if line_parts: + line_parts.sort(key=lambda x: x[0]) + line = "" + last_col = 0 + for col, text in line_parts: + if col > last_col: + line += " " * (col - last_col) + line += text + last_col = col + len(text) + lines.append(line.rstrip()) + else: + lines.append("") + + # Remove trailing empty lines + while lines and not lines[-1]: + lines.pop() + + return lines + + def find_text(self, pattern): + """Find text matching pattern in buffer (for testing). Returns (line, col) or None.""" + for (line, col), (text, _) in self.buffer.items(): + if pattern in text: + return (line, col) + return None + + def contains_text(self, text): + """Check if display contains the given text anywhere (for testing).""" + return self.find_text(text) is not None diff --git a/Lib/profiling/sampling/live_collector/trend_tracker.py b/Lib/profiling/sampling/live_collector/trend_tracker.py new file mode 100644 index 00000000000000..c025b83a13423f --- /dev/null +++ b/Lib/profiling/sampling/live_collector/trend_tracker.py @@ -0,0 +1,157 @@ +"""TrendTracker - Encapsulated trend tracking for live profiling metrics. + +This module provides trend tracking functionality for profiling metrics, +calculating direction indicators (up/down/stable) and managing associated +visual attributes like colors. +""" + +import curses +from typing import Dict, Literal, Any + +TrendDirection = Literal["up", "down", "stable"] + + +class TrendTracker: + """ + Tracks metric trends over time and provides visual indicators. + + This class encapsulates all logic for: + - Tracking previous values of metrics + - Calculating trend directions (up/down/stable) + - Determining visual attributes (colors) for trends + - Managing enable/disable state + + Example: + tracker = TrendTracker(colors_dict) + tracker.update("func1", "nsamples", 10) + trend = tracker.get_trend("func1", "nsamples") + color = tracker.get_color("func1", "nsamples") + """ + + # Threshold for determining if a value has changed significantly + CHANGE_THRESHOLD = 0.001 + + def __init__(self, colors: Dict[str, int], enabled: bool = True): + """ + Initialize the trend tracker. + + Args: + colors: Dictionary containing color attributes including + 'trend_up', 'trend_down', 'trend_stable' + enabled: Whether trend tracking is initially enabled + """ + self._previous_values: Dict[Any, Dict[str, float]] = {} + self._enabled = enabled + self._colors = colors + + @property + def enabled(self) -> bool: + """Whether trend tracking is enabled.""" + return self._enabled + + def toggle(self) -> bool: + """ + Toggle trend tracking on/off. + + Returns: + New enabled state + """ + self._enabled = not self._enabled + return self._enabled + + def set_enabled(self, enabled: bool) -> None: + """Set trend tracking enabled state.""" + self._enabled = enabled + + def update(self, key: Any, metric: str, value: float) -> TrendDirection: + """ + Update a metric value and calculate its trend. + + Args: + key: Identifier for the entity (e.g., function) + metric: Name of the metric (e.g., 'nsamples', 'tottime') + value: Current value of the metric + + Returns: + Trend direction: 'up', 'down', or 'stable' + """ + # Initialize storage for this key if needed + if key not in self._previous_values: + self._previous_values[key] = {} + + # Get previous value, defaulting to current if not tracked yet + prev_value = self._previous_values[key].get(metric, value) + + # Calculate trend + if value > prev_value + self.CHANGE_THRESHOLD: + trend = "up" + elif value < prev_value - self.CHANGE_THRESHOLD: + trend = "down" + else: + trend = "stable" + + # Update previous value for next iteration + self._previous_values[key][metric] = value + + return trend + + def get_trend(self, key: Any, metric: str) -> TrendDirection: + """ + Get the current trend for a metric without updating. + + Args: + key: Identifier for the entity + metric: Name of the metric + + Returns: + Trend direction, or 'stable' if not tracked + """ + # This would require storing trends separately, which we don't do + # For now, return stable if not found + return "stable" + + def get_color(self, trend: TrendDirection) -> int: + """ + Get the color attribute for a trend direction. + + Args: + trend: The trend direction + + Returns: + Curses color attribute (or A_NORMAL if disabled) + """ + if not self._enabled: + return curses.A_NORMAL + + if trend == "up": + return self._colors.get("trend_up", curses.A_BOLD) + elif trend == "down": + return self._colors.get("trend_down", curses.A_BOLD) + else: # stable + return self._colors.get("trend_stable", curses.A_NORMAL) + + def update_metrics(self, key: Any, metrics: Dict[str, float]) -> Dict[str, TrendDirection]: + """ + Update multiple metrics at once and get their trends. + + Args: + key: Identifier for the entity + metrics: Dictionary of metric_name -> value + + Returns: + Dictionary of metric_name -> trend_direction + """ + trends = {} + for metric, value in metrics.items(): + trends[metric] = self.update(key, metric, value) + return trends + + def clear(self) -> None: + """Clear all tracked values (useful on stats reset).""" + self._previous_values.clear() + + def __repr__(self) -> str: + """String representation for debugging.""" + status = "enabled" if self._enabled else "disabled" + tracked = len(self._previous_values) + return f"TrendTracker({status}, tracking {tracked} entities)" diff --git a/Lib/profiling/sampling/live_collector/widgets.py b/Lib/profiling/sampling/live_collector/widgets.py new file mode 100644 index 00000000000000..2af8caa2c2f6d9 --- /dev/null +++ b/Lib/profiling/sampling/live_collector/widgets.py @@ -0,0 +1,963 @@ +"""Widget classes for the live profiling collector UI.""" + +import curses +import time +from abc import ABC, abstractmethod + +from .constants import ( + TOP_FUNCTIONS_DISPLAY_COUNT, + MIN_FUNC_NAME_WIDTH, + MAX_FUNC_NAME_WIDTH, + WIDTH_THRESHOLD_SAMPLE_PCT, + WIDTH_THRESHOLD_TOTTIME, + WIDTH_THRESHOLD_CUMUL_PCT, + WIDTH_THRESHOLD_CUMTIME, + MICROSECONDS_PER_SECOND, + DISPLAY_UPDATE_INTERVAL, + MIN_BAR_WIDTH, + MAX_SAMPLE_RATE_BAR_WIDTH, + MAX_EFFICIENCY_BAR_WIDTH, + MIN_SAMPLE_RATE_FOR_SCALING, + FOOTER_LINES, + FINISHED_BANNER_EXTRA_LINES, +) +from ..constants import ( + THREAD_STATUS_HAS_GIL, + THREAD_STATUS_ON_CPU, + THREAD_STATUS_UNKNOWN, + THREAD_STATUS_GIL_REQUESTED, + PROFILING_MODE_CPU, + PROFILING_MODE_GIL, + PROFILING_MODE_WALL, +) + + +class Widget(ABC): + """Base class for UI widgets.""" + + def __init__(self, display, colors): + """ + Initialize widget. + + Args: + display: DisplayInterface implementation + colors: Dictionary of color attributes + """ + self.display = display + self.colors = colors + + @abstractmethod + def render(self, line, width, **kwargs): + """ + Render the widget starting at the given line. + + Args: + line: Starting line number + width: Available width + **kwargs: Additional rendering parameters + + Returns: + Next available line number after rendering + """ + pass + + def add_str(self, line, col, text, attr=0): + """Add a string to the display at the specified position.""" + self.display.add_str(line, col, text, attr) + + +class ProgressBarWidget(Widget): + """Reusable progress bar widget.""" + + def render(self, line, width, **kwargs): + """Render is not used for progress bars - use render_bar instead.""" + raise NotImplementedError("Use render_bar method instead") + + def render_bar( + self, filled, total, max_width, fill_char="█", empty_char="░" + ): + """ + Render a progress bar and return the bar string and its length. + + Args: + filled: Current filled amount + total: Total amount (max value) + max_width: Maximum width for the bar + fill_char: Character to use for filled portion + empty_char: Character to use for empty portion + + Returns: + Tuple of (bar_string, bar_length) + """ + bar_width = min(max_width, max_width) + normalized = min(filled / max(total, 1), 1.0) + bar_fill = int(normalized * bar_width) + + bar = "[" + for i in range(bar_width): + if i < bar_fill: + bar += fill_char + else: + bar += empty_char + bar += "]" + return bar, len(bar) + + +class HeaderWidget(Widget): + """Widget for rendering the header section (lines 0-8).""" + + def __init__(self, display, colors, collector): + """ + Initialize header widget. + + Args: + display: DisplayInterface implementation + colors: Dictionary of color attributes + collector: Reference to LiveStatsCollector for accessing stats + """ + super().__init__(display, colors) + self.collector = collector + self.progress_bar = ProgressBarWidget(display, colors) + + def render(self, line, width, **kwargs): + """ + Render the complete header section. + + Args: + line: Starting line number + width: Available width + kwargs: Must contain 'elapsed' key + + Returns: + Next available line number + """ + elapsed = kwargs["elapsed"] + + line = self.draw_header_info(line, width, elapsed) + line = self.draw_sample_stats(line, width, elapsed) + line = self.draw_efficiency_bar(line, width) + line = self.draw_thread_status(line, width) + line = self.draw_function_stats( + line, width, kwargs.get("stats_list", []) + ) + line = self.draw_top_functions( + line, width, kwargs.get("stats_list", []) + ) + + # Show prominent finished banner if profiling is complete + if self.collector.finished: + line = self.draw_finished_banner(line, width) + + # Separator + A_DIM = self.display.get_attr("A_DIM") + separator = "─" * (width - 1) + self.add_str(line, 0, separator[: width - 1], A_DIM) + line += 1 + + return line + + def format_uptime(self, elapsed): + """Format elapsed time as uptime string.""" + uptime_sec = int(elapsed) + hours = uptime_sec // 3600 + minutes = (uptime_sec % 3600) // 60 + seconds = uptime_sec % 60 + if hours > 0: + return f"{hours}h{minutes:02d}m{seconds:02d}s" + else: + return f"{minutes}m{seconds:02d}s" + + def draw_header_info(self, line, width, elapsed): + """Draw the header information line with PID, uptime, time, and interval.""" + # Draw title + A_BOLD = self.display.get_attr("A_BOLD") + title = "Tachyon Profiler" + self.add_str(line, 0, title, A_BOLD | self.colors["cyan"]) + line += 1 + + current_time = self.collector.current_time_display + uptime = self.format_uptime(elapsed) + + # Calculate display refresh rate + refresh_hz = ( + 1.0 / self.collector.display_update_interval if self.collector.display_update_interval > 0 else 0 + ) + + # Get current view mode and thread display + if self.collector.view_mode == "ALL": + thread_name = "ALL" + thread_color = self.colors["green"] + else: + # PER_THREAD mode + if self.collector.current_thread_index < len( + self.collector.thread_ids + ): + thread_id = self.collector.thread_ids[ + self.collector.current_thread_index + ] + num_threads = len(self.collector.thread_ids) + thread_name = f"{thread_id} ({self.collector.current_thread_index + 1}/{num_threads})" + thread_color = self.colors["magenta"] + else: + thread_name = "ALL" + thread_color = self.colors["green"] + + header_parts = [ + ("PID: ", curses.A_BOLD), + (f"{self.collector.pid}", self.colors["cyan"]), + (" │ ", curses.A_DIM), + ("Thread: ", curses.A_BOLD), + (thread_name, thread_color), + (" │ ", curses.A_DIM), + ("Uptime: ", curses.A_BOLD), + (uptime, self.colors["green"]), + (" │ ", curses.A_DIM), + ("Time: ", curses.A_BOLD), + (current_time, self.colors["yellow"]), + (" │ ", curses.A_DIM), + ("Interval: ", curses.A_BOLD), + ( + f"{self.collector.sample_interval_usec}µs", + self.colors["magenta"], + ), + (" │ ", curses.A_DIM), + ("Display: ", curses.A_BOLD), + (f"{refresh_hz:.1f}Hz", self.colors["cyan"]), + ] + + col = 0 + for text, attr in header_parts: + if col < width - 1: + self.add_str(line, col, text, attr) + col += len(text) + return line + 1 + + def format_rate_with_units(self, rate_hz): + """Format a rate in Hz with appropriate units (Hz, KHz, MHz).""" + if rate_hz >= 1_000_000: + return f"{rate_hz / 1_000_000:.1f}MHz" + elif rate_hz >= 1_000: + return f"{rate_hz / 1_000:.1f}KHz" + else: + return f"{rate_hz:.1f}Hz" + + def draw_sample_stats(self, line, width, elapsed): + """Draw sample statistics with visual progress bar.""" + sample_rate = ( + self.collector.total_samples / elapsed if elapsed > 0 else 0 + ) + + # Update max sample rate + if sample_rate > self.collector.max_sample_rate: + self.collector.max_sample_rate = sample_rate + + col = 0 + self.add_str(line, col, "Samples: ", curses.A_BOLD) + col += 9 + self.add_str( + line, + col, + f"{self.collector.total_samples:>8}", + self.colors["cyan"], + ) + col += 8 + self.add_str( + line, col, f" total ({sample_rate:>7.1f}/s) ", curses.A_NORMAL + ) + col += 23 + + # Draw sample rate bar + target_rate = ( + MICROSECONDS_PER_SECOND / self.collector.sample_interval_usec + ) + + # Show current/target ratio with percentage + if sample_rate > 0 and target_rate > 0: + percentage = min((sample_rate / target_rate) * 100, 100) + current_formatted = self.format_rate_with_units(sample_rate) + target_formatted = self.format_rate_with_units(target_rate) + + if percentage >= 99.5: # Show 100% when very close + rate_label = f" {current_formatted}/{target_formatted} (100%)" + else: + rate_label = f" {current_formatted}/{target_formatted} ({percentage:>4.1f}%)" + else: + target_formatted = self.format_rate_with_units(target_rate) + rate_label = f" target: {target_formatted}" + + available_width = width - col - len(rate_label) - 3 + + if available_width >= MIN_BAR_WIDTH: + bar_width = min(MAX_SAMPLE_RATE_BAR_WIDTH, available_width) + # Use target rate as the reference, with a minimum for scaling + reference_rate = max(target_rate, MIN_SAMPLE_RATE_FOR_SCALING) + normalized_rate = min(sample_rate / reference_rate, 1.0) + bar_fill = int(normalized_rate * bar_width) + + bar = "[" + for i in range(bar_width): + bar += "█" if i < bar_fill else "░" + bar += "]" + self.add_str(line, col, bar, self.colors["green"]) + col += len(bar) + + if col + len(rate_label) < width - 1: + self.add_str(line, col + 1, rate_label, curses.A_DIM) + return line + 1 + + def draw_efficiency_bar(self, line, width): + """Draw sample efficiency bar showing success/failure rates.""" + success_pct = ( + self.collector.successful_samples + / max(1, self.collector.total_samples) + ) * 100 + failed_pct = ( + self.collector.failed_samples + / max(1, self.collector.total_samples) + ) * 100 + + col = 0 + self.add_str(line, col, "Efficiency:", curses.A_BOLD) + col += 11 + + label = f" {success_pct:>5.2f}% good, {failed_pct:>4.2f}% failed" + available_width = width - col - len(label) - 3 + + if available_width >= MIN_BAR_WIDTH: + bar_width = min(MAX_EFFICIENCY_BAR_WIDTH, available_width) + success_fill = int( + ( + self.collector.successful_samples + / max(1, self.collector.total_samples) + ) + * bar_width + ) + failed_fill = bar_width - success_fill + + self.add_str(line, col, "[", curses.A_NORMAL) + col += 1 + if success_fill > 0: + self.add_str( + line, col, "█" * success_fill, self.colors["green"] + ) + col += success_fill + if failed_fill > 0: + self.add_str(line, col, "█" * failed_fill, self.colors["red"]) + col += failed_fill + self.add_str(line, col, "]", curses.A_NORMAL) + col += 1 + + self.add_str(line, col + 1, label, curses.A_NORMAL) + return line + 1 + + def _add_percentage_stat( + self, line, col, value, label, color, add_separator=False + ): + """Add a percentage stat to the display. + + Args: + line: Line number + col: Starting column + value: Percentage value + label: Label text + color: Color attribute + add_separator: Whether to add separator before the stat + + Returns: + Updated column position + """ + if add_separator: + self.add_str(line, col, " │ ", curses.A_DIM) + col += 3 + + self.add_str(line, col, f"{value:>4.1f}", color) + col += 4 + self.add_str(line, col, f"% {label}", curses.A_NORMAL) + col += len(label) + 2 + + return col + + def draw_thread_status(self, line, width): + """Draw thread status statistics and GC information.""" + # Get status counts for current view mode + thread_data = self.collector._get_current_thread_data() + status_counts = thread_data.as_status_dict() if thread_data else self.collector.thread_status_counts + + # Calculate percentages + total_threads = max(1, status_counts["total"]) + pct_on_gil = (status_counts["has_gil"] / total_threads) * 100 + pct_off_gil = 100.0 - pct_on_gil + pct_gil_requested = (status_counts["gil_requested"] / total_threads) * 100 + + # Get GC percentage based on view mode + if thread_data: + total_samples = max(1, thread_data.sample_count) + pct_gc = (thread_data.gc_frame_samples / total_samples) * 100 + else: + total_samples = max(1, self.collector.total_samples) + pct_gc = (self.collector.gc_frame_samples / total_samples) * 100 + + col = 0 + self.add_str(line, col, "Threads: ", curses.A_BOLD) + col += 11 + + # Show GIL stats only if mode is not GIL (GIL mode filters to only GIL holders) + if self.collector.mode != PROFILING_MODE_GIL: + col = self._add_percentage_stat( + line, col, pct_on_gil, "on gil", self.colors["green"] + ) + col = self._add_percentage_stat( + line, + col, + pct_off_gil, + "off gil", + self.colors["red"], + add_separator=True, + ) + + # Show "waiting for gil" only if mode is not GIL + if self.collector.mode != PROFILING_MODE_GIL and col < width - 30: + col = self._add_percentage_stat( + line, + col, + pct_gil_requested, + "waiting for gil", + self.colors["yellow"], + add_separator=True, + ) + + # Always show GC stats + if col < width - 15: + col = self._add_percentage_stat( + line, + col, + pct_gc, + "GC", + self.colors["magenta"], + add_separator=(col > 11), + ) + + return line + 1 + + def draw_function_stats(self, line, width, stats_list): + """Draw function statistics summary.""" + result_set = self.collector._get_current_result_source() + total_funcs = len(result_set) + funcs_shown = len(stats_list) + executing_funcs = sum( + 1 for f in result_set.values() if f.get("direct_calls", 0) > 0 + ) + stack_only = total_funcs - executing_funcs + + col = 0 + self.add_str(line, col, "Functions: ", curses.A_BOLD) + col += 11 + self.add_str(line, col, f"{total_funcs:>5}", self.colors["cyan"]) + col += 5 + self.add_str(line, col, " total", curses.A_NORMAL) + col += 6 + + if col < width - 25: + self.add_str(line, col, " │ ", curses.A_DIM) + col += 3 + self.add_str( + line, col, f"{executing_funcs:>5}", self.colors["green"] + ) + col += 5 + self.add_str(line, col, " exec", curses.A_NORMAL) + col += 5 + + if col < width - 25: + self.add_str(line, col, " │ ", curses.A_DIM) + col += 3 + self.add_str(line, col, f"{stack_only:>5}", self.colors["yellow"]) + col += 5 + self.add_str(line, col, " stack", curses.A_NORMAL) + col += 6 + + if col < width - 20: + self.add_str(line, col, " │ ", curses.A_DIM) + col += 3 + self.add_str( + line, col, f"{funcs_shown:>5}", self.colors["magenta"] + ) + col += 5 + self.add_str(line, col, " shown", curses.A_NORMAL) + return line + 1 + + def draw_top_functions(self, line, width, stats_list): + """Draw top N hottest functions.""" + col = 0 + self.add_str( + line, + col, + f"Top {TOP_FUNCTIONS_DISPLAY_COUNT}: ", + curses.A_BOLD, + ) + col += 11 + + top_by_samples = sorted( + stats_list, key=lambda x: x["direct_calls"], reverse=True + ) + emojis = ["🥇", "🥈", "🥉"] + medal_colors = [ + self.colors["red"], + self.colors["yellow"], + self.colors["green"], + ] + + displayed = 0 + for func_data in top_by_samples: + if displayed >= TOP_FUNCTIONS_DISPLAY_COUNT: + break + if col >= width - 20: + break + if func_data["direct_calls"] == 0: + continue + + func_name = func_data["func"][2] + func_pct = ( + func_data["direct_calls"] + / max(1, self.collector.total_samples) + ) * 100 + + # Medal emoji + if col + 3 < width - 15: + self.add_str( + line, col, emojis[displayed] + " ", medal_colors[displayed] + ) + col += 3 + + # Function name (truncate to fit) + available_for_name = width - col - 15 + max_name_len = min(25, max(5, available_for_name)) + if len(func_name) > max_name_len: + func_name = func_name[: max_name_len - 3] + "..." + + if col + len(func_name) < width - 10: + self.add_str(line, col, func_name, medal_colors[displayed]) + col += len(func_name) + + pct_str = ( + f" ({func_pct:.1f}%)" + if func_pct >= 0.1 + else f" ({func_data['direct_calls']})" + ) + self.add_str(line, col, pct_str, curses.A_DIM) + col += len(pct_str) + + displayed += 1 + + if displayed < 3 and col < width - 30: + self.add_str(line, col, " │ ", curses.A_DIM) + col += 3 + + if displayed == 0 and col < width - 25: + self.add_str(line, col, "(collecting samples...)", curses.A_DIM) + + return line + 1 + + def draw_finished_banner(self, line, width): + """Draw a prominent banner when profiling is finished.""" + A_REVERSE = self.display.get_attr("A_REVERSE") + A_BOLD = self.display.get_attr("A_BOLD") + + # Add blank line for separation + line += 1 + + # Create the banner message + message = " ✓ PROFILING COMPLETE - Final Results Below - Press 'q' to Quit " + + # Center the message and fill the width with reverse video + if len(message) < width - 1: + padding_total = width - len(message) - 1 + padding_left = padding_total // 2 + padding_right = padding_total - padding_left + full_message = " " * padding_left + message + " " * padding_right + else: + full_message = message[: width - 1] + + # Draw the banner with reverse video and bold + self.add_str( + line, 0, full_message, A_REVERSE | A_BOLD | self.colors["green"] + ) + line += 1 + + # Add blank line for separation + line += 1 + + return line + + +class TableWidget(Widget): + """Widget for rendering column headers and data rows.""" + + def __init__(self, display, colors, collector): + """ + Initialize table widget. + + Args: + display: DisplayInterface implementation + colors: Dictionary of color attributes + collector: Reference to LiveStatsCollector for accessing stats + """ + super().__init__(display, colors) + self.collector = collector + + def render(self, line, width, **kwargs): + """ + Render column headers and data rows. + + Args: + line: Starting line number + width: Available width + kwargs: Must contain 'height' and 'stats_list' keys + + Returns: + Next available line number + """ + height = kwargs["height"] + stats_list = kwargs["stats_list"] + + # Draw column headers + line, show_sample_pct, show_tottime, show_cumul_pct, show_cumtime = ( + self.draw_column_headers(line, width) + ) + column_flags = ( + show_sample_pct, + show_tottime, + show_cumul_pct, + show_cumtime, + ) + + # Draw data rows + line = self.draw_stats_rows( + line, height, width, stats_list, column_flags + ) + + return line + + def draw_column_headers(self, line, width): + """Draw column headers with sort indicators.""" + col = 0 + + # Determine which columns to show based on width + show_sample_pct = width >= WIDTH_THRESHOLD_SAMPLE_PCT + show_tottime = width >= WIDTH_THRESHOLD_TOTTIME + show_cumul_pct = width >= WIDTH_THRESHOLD_CUMUL_PCT + show_cumtime = width >= WIDTH_THRESHOLD_CUMTIME + + sorted_header = self.colors["sorted_header"] + normal_header = self.colors["normal_header"] + + # Determine which column is sorted + sort_col = { + "nsamples": 0, + "sample_pct": 1, + "tottime": 2, + "cumul_pct": 3, + "cumtime": 4, + }.get(self.collector.sort_by, -1) + + # Column 0: nsamples + attr = sorted_header if sort_col == 0 else normal_header + text = f"{'▼nsamples' if sort_col == 0 else 'nsamples':>13}" + self.add_str(line, col, text, attr) + col += 15 + + # Column 1: sample % + if show_sample_pct: + attr = sorted_header if sort_col == 1 else normal_header + text = f"{'▼%' if sort_col == 1 else '%':>5}" + self.add_str(line, col, text, attr) + col += 7 + + # Column 2: tottime + if show_tottime: + attr = sorted_header if sort_col == 2 else normal_header + text = f"{'▼tottime' if sort_col == 2 else 'tottime':>10}" + self.add_str(line, col, text, attr) + col += 12 + + # Column 3: cumul % + if show_cumul_pct: + attr = sorted_header if sort_col == 3 else normal_header + text = f"{'▼%' if sort_col == 3 else '%':>5}" + self.add_str(line, col, text, attr) + col += 7 + + # Column 4: cumtime + if show_cumtime: + attr = sorted_header if sort_col == 4 else normal_header + text = f"{'▼cumtime' if sort_col == 4 else 'cumtime':>10}" + self.add_str(line, col, text, attr) + col += 12 + + # Remaining headers + if col < width - 15: + remaining_space = width - col - 1 + func_width = min( + MAX_FUNC_NAME_WIDTH, + max(MIN_FUNC_NAME_WIDTH, remaining_space // 2), + ) + self.add_str( + line, col, f"{'function':<{func_width}}", normal_header + ) + col += func_width + 2 + + if col < width - 10: + self.add_str(line, col, "file:line", normal_header) + + return ( + line + 1, + show_sample_pct, + show_tottime, + show_cumul_pct, + show_cumtime, + ) + + def draw_stats_rows(self, line, height, width, stats_list, column_flags): + """Draw the statistics data rows.""" + show_sample_pct, show_tottime, show_cumul_pct, show_cumtime = ( + column_flags + ) + + # Get color attributes from the colors dict (already initialized) + color_samples = self.colors.get("color_samples", curses.A_NORMAL) + color_file = self.colors.get("color_file", curses.A_NORMAL) + color_func = self.colors.get("color_func", curses.A_NORMAL) + + # Get trend tracker for color decisions + trend_tracker = self.collector._trend_tracker + + for stat in stats_list: + if line >= height - FOOTER_LINES: + break + + func = stat["func"] + direct_calls = stat["direct_calls"] + cumulative_calls = stat["cumulative_calls"] + total_time = stat["total_time"] + cumulative_time = stat["cumulative_time"] + trends = stat.get("trends", {}) + + sample_pct = ( + (direct_calls / self.collector.total_samples * 100) + if self.collector.total_samples > 0 + else 0 + ) + cum_pct = ( + (cumulative_calls / self.collector.total_samples * 100) + if self.collector.total_samples > 0 + else 0 + ) + + # Helper function to get trend color for a specific column + def get_trend_color(column_name): + trend = trends.get(column_name, "stable") + if trend_tracker is not None: + return trend_tracker.get_color(trend) + return curses.A_NORMAL + + filename, lineno, funcname = func[0], func[1], func[2] + samples_str = f"{direct_calls}/{cumulative_calls}" + col = 0 + + # Samples column - apply trend color based on nsamples trend + nsamples_color = get_trend_color("nsamples") + self.add_str(line, col, f"{samples_str:>13}", nsamples_color) + col += 15 + + # Sample % column + if show_sample_pct: + sample_pct_color = get_trend_color("sample_pct") + self.add_str(line, col, f"{sample_pct:>5.1f}", sample_pct_color) + col += 7 + + # Total time column + if show_tottime: + tottime_color = get_trend_color("tottime") + self.add_str(line, col, f"{total_time:>10.3f}", tottime_color) + col += 12 + + # Cumul % column + if show_cumul_pct: + cumul_pct_color = get_trend_color("cumul_pct") + self.add_str(line, col, f"{cum_pct:>5.1f}", cumul_pct_color) + col += 7 + + # Cumul time column + if show_cumtime: + cumtime_color = get_trend_color("cumtime") + self.add_str(line, col, f"{cumulative_time:>10.3f}", cumtime_color) + col += 12 + + # Function name column + if col < width - 15: + remaining_space = width - col - 1 + func_width = min( + MAX_FUNC_NAME_WIDTH, + max(MIN_FUNC_NAME_WIDTH, remaining_space // 2), + ) + + func_display = funcname + if len(funcname) > func_width: + func_display = funcname[: func_width - 3] + "..." + func_display = f"{func_display:<{func_width}}" + self.add_str(line, col, func_display, color_func) + col += func_width + 2 + + # File:line column + if col < width - 10: + simplified_path = self.collector.simplify_path(filename) + file_line = f"{simplified_path}:{lineno}" + remaining_width = width - col - 1 + self.add_str( + line, col, file_line[:remaining_width], color_file + ) + + line += 1 + + return line + + +class FooterWidget(Widget): + """Widget for rendering the footer section (legend and controls).""" + + def __init__(self, display, colors, collector): + """ + Initialize footer widget. + + Args: + display: DisplayInterface implementation + colors: Dictionary of color attributes + collector: Reference to LiveStatsCollector for accessing state + """ + super().__init__(display, colors) + self.collector = collector + + def render(self, line, width, **kwargs): + """ + Render the footer at the specified position. + + Args: + line: Starting line number (should be height - 2) + width: Available width + + Returns: + Next available line number + """ + A_DIM = self.display.get_attr("A_DIM") + A_BOLD = self.display.get_attr("A_BOLD") + + # Legend line + legend = "nsamples: direct/cumulative (direct=executing, cumulative=on stack)" + self.add_str(line, 0, legend[: width - 1], A_DIM) + line += 1 + + # Controls line with status + sort_names = { + "tottime": "Total Time", + "nsamples": "Direct Samples", + "cumtime": "Cumulative Time", + "sample_pct": "Sample %", + "cumul_pct": "Cumulative %", + } + sort_display = sort_names.get( + self.collector.sort_by, self.collector.sort_by + ) + + # Build status indicators + status = [] + if self.collector.finished: + status.append("[PROFILING FINISHED - Press 'q' to quit]") + elif self.collector.paused: + status.append("[PAUSED]") + if self.collector.filter_pattern: + status.append( + f"[Filter: {self.collector.filter_pattern} (c to clear)]" + ) + # Show trend colors status if disabled + if self.collector._trend_tracker is not None and not self.collector._trend_tracker.enabled: + status.append("[Trend colors: OFF]") + status_str = " ".join(status) + " " if status else "" + + if self.collector.finished: + footer = f"{status_str}" + else: + footer = f"{status_str}Sort: {sort_display} | 't':mode 'x':trends ←→:thread 'h':help 'q':quit" + self.add_str( + line, + 0, + footer[: width - 1], + A_BOLD + if (self.collector.paused or self.collector.finished) + else A_DIM, + ) + + return line + 1 + + def render_filter_input_prompt(self, line, width): + """Draw the filter input prompt at the bottom of the screen.""" + A_BOLD = self.display.get_attr("A_BOLD") + A_REVERSE = self.display.get_attr("A_REVERSE") + + # Draw prompt on last line + prompt = f"Function filter: {self.collector.filter_input_buffer}_" + self.add_str(line, 0, prompt[: width - 1], A_REVERSE | A_BOLD) + + +class HelpWidget(Widget): + """Widget for rendering the help screen overlay.""" + + def render(self, line, width, **kwargs): + """ + Render the help screen. + + Args: + line: Starting line number (ignored, help is centered) + width: Available width + kwargs: Must contain 'height' key + + Returns: + Next available line number (not used for overlays) + """ + height = kwargs["height"] + A_BOLD = self.display.get_attr("A_BOLD") + A_NORMAL = self.display.get_attr("A_NORMAL") + + help_lines = [ + ("Tachyon Profiler - Interactive Commands", A_BOLD), + ("", A_NORMAL), + ("Navigation & Display:", A_BOLD), + (" s - Cycle through sort modes (forward)", A_NORMAL), + (" S - Cycle through sort modes (backward)", A_NORMAL), + (" t - Toggle view mode (ALL / per-thread)", A_NORMAL), + (" x - Toggle trend colors (on/off)", A_NORMAL), + (" ← → ↑ ↓ - Navigate threads (in per-thread mode)", A_NORMAL), + (" + - Faster display refresh rate", A_NORMAL), + (" - - Slower display refresh rate", A_NORMAL), + ("", A_NORMAL), + ("Control:", A_BOLD), + (" p - Freeze display (snapshot)", A_NORMAL), + (" r - Reset all statistics", A_NORMAL), + ("", A_NORMAL), + ("Filtering:", A_BOLD), + (" / - Enter function filter (substring)", A_NORMAL), + (" c - Clear filter", A_NORMAL), + (" ESC - Cancel filter input", A_NORMAL), + ("", A_NORMAL), + ("Other:", A_BOLD), + (" h or ? - Show/hide this help", A_NORMAL), + (" q - Quit profiler", A_NORMAL), + ("", A_NORMAL), + ("Press any key to close this help screen", A_BOLD), + ] + + start_line = (height - len(help_lines)) // 2 + for i, (text, attr) in enumerate(help_lines): + if start_line + i < height - 1: + col = 2 # Left-align with small margin + self.add_str(start_line + i, col, text[: width - 3], attr) + + return line # Not used for overlays diff --git a/Lib/profiling/sampling/pstats_collector.py b/Lib/profiling/sampling/pstats_collector.py index e06dbf40aa1d89..b8b37a10c43ad3 100644 --- a/Lib/profiling/sampling/pstats_collector.py +++ b/Lib/profiling/sampling/pstats_collector.py @@ -1,6 +1,7 @@ import collections import marshal +from _colorize import ANSIColors from .collector import Collector @@ -70,3 +71,342 @@ def create_stats(self): cumulative, callers, ) + + def print_stats(self, sort=-1, limit=None, show_summary=True, mode=None): + """Print formatted statistics to stdout.""" + import pstats + from .constants import PROFILING_MODE_CPU + + # Create stats object + stats = pstats.SampledStats(self).strip_dirs() + if not stats.stats: + print("No samples were collected.") + if mode == PROFILING_MODE_CPU: + print("This can happen in CPU mode when all threads are idle.") + return + + # Get the stats data + stats_list = [] + for func, ( + direct_calls, + cumulative_calls, + total_time, + cumulative_time, + callers, + ) in stats.stats.items(): + stats_list.append( + ( + func, + direct_calls, + cumulative_calls, + total_time, + cumulative_time, + callers, + ) + ) + + # Calculate total samples for percentage calculations (using direct_calls) + total_samples = sum( + direct_calls for _, direct_calls, _, _, _, _ in stats_list + ) + + # Sort based on the requested field + sort_field = sort + if sort_field == -1: # stdname + stats_list.sort(key=lambda x: str(x[0])) + elif sort_field == 0: # nsamples (direct samples) + stats_list.sort(key=lambda x: x[1], reverse=True) # direct_calls + elif sort_field == 1: # tottime + stats_list.sort(key=lambda x: x[3], reverse=True) # total_time + elif sort_field == 2: # cumtime + stats_list.sort(key=lambda x: x[4], reverse=True) # cumulative_time + elif sort_field == 3: # sample% + stats_list.sort( + key=lambda x: (x[1] / total_samples * 100) + if total_samples > 0 + else 0, + reverse=True, # direct_calls percentage + ) + elif sort_field == 4: # cumul% + stats_list.sort( + key=lambda x: (x[2] / total_samples * 100) + if total_samples > 0 + else 0, + reverse=True, # cumulative_calls percentage + ) + elif sort_field == 5: # nsamples (cumulative samples) + stats_list.sort(key=lambda x: x[2], reverse=True) # cumulative_calls + + # Apply limit if specified + if limit is not None: + stats_list = stats_list[:limit] + + # Determine the best unit for time columns based on maximum values + max_total_time = max( + (total_time for _, _, _, total_time, _, _ in stats_list), default=0 + ) + max_cumulative_time = max( + (cumulative_time for _, _, _, _, cumulative_time, _ in stats_list), + default=0, + ) + + total_time_unit, total_time_scale = self._determine_best_unit(max_total_time) + cumulative_time_unit, cumulative_time_scale = self._determine_best_unit( + max_cumulative_time + ) + + # Define column widths for consistent alignment + col_widths = { + "nsamples": 15, # "nsamples" column (inline/cumulative format) + "sample_pct": 8, # "sample%" column + "tottime": max(12, len(f"tottime ({total_time_unit})")), + "cum_pct": 8, # "cumul%" column + "cumtime": max(12, len(f"cumtime ({cumulative_time_unit})")), + } + + # Print header with colors and proper alignment + print(f"{ANSIColors.BOLD_BLUE}Profile Stats:{ANSIColors.RESET}") + + header_nsamples = f"{ANSIColors.BOLD_BLUE}{'nsamples':>{col_widths['nsamples']}}{ANSIColors.RESET}" + header_sample_pct = f"{ANSIColors.BOLD_BLUE}{'sample%':>{col_widths['sample_pct']}}{ANSIColors.RESET}" + header_tottime = f"{ANSIColors.BOLD_BLUE}{f'tottime ({total_time_unit})':>{col_widths['tottime']}}{ANSIColors.RESET}" + header_cum_pct = f"{ANSIColors.BOLD_BLUE}{'cumul%':>{col_widths['cum_pct']}}{ANSIColors.RESET}" + header_cumtime = f"{ANSIColors.BOLD_BLUE}{f'cumtime ({cumulative_time_unit})':>{col_widths['cumtime']}}{ANSIColors.RESET}" + header_filename = ( + f"{ANSIColors.BOLD_BLUE}filename:lineno(function){ANSIColors.RESET}" + ) + + print( + f"{header_nsamples} {header_sample_pct} {header_tottime} {header_cum_pct} {header_cumtime} {header_filename}" + ) + + # Print each line with proper alignment + for ( + func, + direct_calls, + cumulative_calls, + total_time, + cumulative_time, + callers, + ) in stats_list: + # Calculate percentages + sample_pct = ( + (direct_calls / total_samples * 100) if total_samples > 0 else 0 + ) + cum_pct = ( + (cumulative_calls / total_samples * 100) + if total_samples > 0 + else 0 + ) + + # Format values with proper alignment - always use A/B format + nsamples_str = f"{direct_calls}/{cumulative_calls}" + nsamples_str = f"{nsamples_str:>{col_widths['nsamples']}}" + sample_pct_str = f"{sample_pct:{col_widths['sample_pct']}.1f}" + tottime = f"{total_time * total_time_scale:{col_widths['tottime']}.3f}" + cum_pct_str = f"{cum_pct:{col_widths['cum_pct']}.1f}" + cumtime = f"{cumulative_time * cumulative_time_scale:{col_widths['cumtime']}.3f}" + + # Format the function name with colors + func_name = ( + f"{ANSIColors.GREEN}{func[0]}{ANSIColors.RESET}:" + f"{ANSIColors.YELLOW}{func[1]}{ANSIColors.RESET}(" + f"{ANSIColors.CYAN}{func[2]}{ANSIColors.RESET})" + ) + + # Print the formatted line with consistent spacing + print( + f"{nsamples_str} {sample_pct_str} {tottime} {cum_pct_str} {cumtime} {func_name}" + ) + + # Print legend + print(f"\n{ANSIColors.BOLD_BLUE}Legend:{ANSIColors.RESET}") + print( + f" {ANSIColors.YELLOW}nsamples{ANSIColors.RESET}: Direct/Cumulative samples (direct executing / on call stack)" + ) + print( + f" {ANSIColors.YELLOW}sample%{ANSIColors.RESET}: Percentage of total samples this function was directly executing" + ) + print( + f" {ANSIColors.YELLOW}tottime{ANSIColors.RESET}: Estimated total time spent directly in this function" + ) + print( + f" {ANSIColors.YELLOW}cumul%{ANSIColors.RESET}: Percentage of total samples when this function was on the call stack" + ) + print( + f" {ANSIColors.YELLOW}cumtime{ANSIColors.RESET}: Estimated cumulative time (including time in called functions)" + ) + print( + f" {ANSIColors.YELLOW}filename:lineno(function){ANSIColors.RESET}: Function location and name" + ) + + # Print summary of interesting functions if enabled + if show_summary and stats_list: + self._print_summary(stats_list, total_samples) + + @staticmethod + def _determine_best_unit(max_value): + """Determine the best unit (s, ms, μs) and scale factor for a maximum value.""" + if max_value >= 1.0: + return "s", 1.0 + elif max_value >= 0.001: + return "ms", 1000.0 + else: + return "μs", 1000000.0 + + def _print_summary(self, stats_list, total_samples): + """Print summary of interesting functions.""" + print( + f"\n{ANSIColors.BOLD_BLUE}Summary of Interesting Functions:{ANSIColors.RESET}" + ) + + # Aggregate stats by fully qualified function name (ignoring line numbers) + func_aggregated = {} + for ( + func, + direct_calls, + cumulative_calls, + total_time, + cumulative_time, + callers, + ) in stats_list: + # Use filename:function_name as the key to get fully qualified name + qualified_name = f"{func[0]}:{func[2]}" + if qualified_name not in func_aggregated: + func_aggregated[qualified_name] = [ + 0, + 0, + 0, + 0, + ] # direct_calls, cumulative_calls, total_time, cumulative_time + func_aggregated[qualified_name][0] += direct_calls + func_aggregated[qualified_name][1] += cumulative_calls + func_aggregated[qualified_name][2] += total_time + func_aggregated[qualified_name][3] += cumulative_time + + # Convert aggregated data back to list format for processing + aggregated_stats = [] + for qualified_name, ( + prim_calls, + total_calls, + total_time, + cumulative_time, + ) in func_aggregated.items(): + # Parse the qualified name back to filename and function name + if ":" in qualified_name: + filename, func_name = qualified_name.rsplit(":", 1) + else: + filename, func_name = "", qualified_name + # Create a dummy func tuple with filename and function name for display + dummy_func = (filename, "", func_name) + aggregated_stats.append( + ( + dummy_func, + prim_calls, + total_calls, + total_time, + cumulative_time, + {}, + ) + ) + + # Determine best units for summary metrics + max_total_time = max( + (total_time for _, _, _, total_time, _, _ in aggregated_stats), + default=0, + ) + max_cumulative_time = max( + ( + cumulative_time + for _, _, _, _, cumulative_time, _ in aggregated_stats + ), + default=0, + ) + + total_unit, total_scale = self._determine_best_unit(max_total_time) + cumulative_unit, cumulative_scale = self._determine_best_unit( + max_cumulative_time + ) + + def _format_func_name(func): + """Format function name with colors.""" + return ( + f"{ANSIColors.GREEN}{func[0]}{ANSIColors.RESET}:" + f"{ANSIColors.YELLOW}{func[1]}{ANSIColors.RESET}(" + f"{ANSIColors.CYAN}{func[2]}{ANSIColors.RESET})" + ) + + def _print_top_functions(stats_list, title, key_func, format_line, n=3): + """Print top N functions sorted by key_func with formatted output.""" + print(f"\n{ANSIColors.BOLD_BLUE}{title}:{ANSIColors.RESET}") + sorted_stats = sorted(stats_list, key=key_func, reverse=True) + for stat in sorted_stats[:n]: + if line := format_line(stat): + print(f" {line}") + + # Functions with highest direct/cumulative ratio (hot spots) + def format_hotspots(stat): + func, direct_calls, cumulative_calls, total_time, _, _ = stat + if direct_calls > 0 and cumulative_calls > 0: + ratio = direct_calls / cumulative_calls + direct_pct = ( + (direct_calls / total_samples * 100) + if total_samples > 0 + else 0 + ) + return ( + f"{ratio:.3f} direct/cumulative ratio, " + f"{direct_pct:.1f}% direct samples: {_format_func_name(func)}" + ) + return None + + _print_top_functions( + aggregated_stats, + "Functions with Highest Direct/Cumulative Ratio (Hot Spots)", + key_func=lambda x: (x[1] / x[2]) if x[2] > 0 else 0, + format_line=format_hotspots, + ) + + # Functions with highest call frequency (cumulative/direct difference) + def format_call_frequency(stat): + func, direct_calls, cumulative_calls, total_time, _, _ = stat + if cumulative_calls > direct_calls: + call_frequency = cumulative_calls - direct_calls + cum_pct = ( + (cumulative_calls / total_samples * 100) + if total_samples > 0 + else 0 + ) + return ( + f"{call_frequency:d} indirect calls, " + f"{cum_pct:.1f}% total stack presence: {_format_func_name(func)}" + ) + return None + + _print_top_functions( + aggregated_stats, + "Functions with Highest Call Frequency (Indirect Calls)", + key_func=lambda x: x[2] - x[1], # Sort by (cumulative - direct) + format_line=format_call_frequency, + ) + + # Functions with highest cumulative-to-direct multiplier (call magnification) + def format_call_magnification(stat): + func, direct_calls, cumulative_calls, total_time, _, _ = stat + if direct_calls > 0 and cumulative_calls > direct_calls: + multiplier = cumulative_calls / direct_calls + indirect_calls = cumulative_calls - direct_calls + return ( + f"{multiplier:.1f}x call magnification, " + f"{indirect_calls:d} indirect calls from {direct_calls:d} direct: {_format_func_name(func)}" + ) + return None + + _print_top_functions( + aggregated_stats, + "Functions with Highest Call Magnification (Cumulative/Direct)", + key_func=lambda x: (x[2] / x[1]) + if x[1] > 0 + else 0, # Sort by cumulative/direct ratio + format_line=format_call_magnification, + ) diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py index 713931a639dccb..f3fa441a35f420 100644 --- a/Lib/profiling/sampling/sample.py +++ b/Lib/profiling/sampling/sample.py @@ -1,9 +1,6 @@ -import argparse import _remote_debugging import os import pstats -import socket -import subprocess import statistics import sys import sysconfig @@ -14,126 +11,19 @@ from .pstats_collector import PstatsCollector from .stack_collector import CollapsedStackCollector, FlamegraphCollector from .gecko_collector import GeckoCollector +from .constants import ( + PROFILING_MODE_WALL, + PROFILING_MODE_CPU, + PROFILING_MODE_GIL, + PROFILING_MODE_ALL, +) +try: + from .live_collector import LiveStatsCollector +except ImportError: + LiveStatsCollector = None _FREE_THREADED_BUILD = sysconfig.get_config_var("Py_GIL_DISABLED") is not None -# Profiling mode constants -PROFILING_MODE_WALL = 0 -PROFILING_MODE_CPU = 1 -PROFILING_MODE_GIL = 2 -PROFILING_MODE_ALL = 3 # Combines GIL + CPU checks - - -def _parse_mode(mode_string): - """Convert mode string to mode constant.""" - mode_map = { - "wall": PROFILING_MODE_WALL, - "cpu": PROFILING_MODE_CPU, - "gil": PROFILING_MODE_GIL, - } - return mode_map[mode_string] -_HELP_DESCRIPTION = """Sample a process's stack frames and generate profiling data. -Supports the following target modes: - - -p PID: Profile an existing process by PID - - -m MODULE [ARGS...]: Profile a module as python -m module ... - - filename [ARGS...]: Profile the specified script by running it in a subprocess - -Supports the following output formats: - - --pstats: Detailed profiling statistics with sorting options - - --collapsed: Stack traces for generating flamegraphs - - --flamegraph Interactive HTML flamegraph visualization (requires web browser) - -Examples: - # Profile process 1234 for 10 seconds with default settings - python -m profiling.sampling -p 1234 - - # Profile a script by running it in a subprocess - python -m profiling.sampling myscript.py arg1 arg2 - - # Profile a module by running it as python -m module in a subprocess - python -m profiling.sampling -m mymodule arg1 arg2 - - # Profile with custom interval and duration, save to file - python -m profiling.sampling -i 50 -d 30 -o profile.stats -p 1234 - - # Generate collapsed stacks for flamegraph - python -m profiling.sampling --collapsed -p 1234 - - # Generate a HTML flamegraph - python -m profiling.sampling --flamegraph -p 1234 - - # Profile all threads, sort by total time - python -m profiling.sampling -a --sort-tottime -p 1234 - - # Profile for 1 minute with 1ms sampling interval - python -m profiling.sampling -i 1000 -d 60 -p 1234 - - # Show only top 20 functions sorted by direct samples - python -m profiling.sampling --sort-nsamples -l 20 -p 1234 - - # Profile all threads and save collapsed stacks - python -m profiling.sampling -a --collapsed -o stacks.txt -p 1234 - - # Profile with real-time sampling statistics - python -m profiling.sampling --realtime-stats -p 1234 - - # Sort by sample percentage to find most sampled functions - python -m profiling.sampling --sort-sample-pct -p 1234 - - # Sort by cumulative samples to find functions most on call stack - python -m profiling.sampling --sort-nsamples-cumul -p 1234""" - - -# Constants for socket synchronization -_SYNC_TIMEOUT = 5.0 -_PROCESS_KILL_TIMEOUT = 2.0 -_READY_MESSAGE = b"ready" -_RECV_BUFFER_SIZE = 1024 - - -def _run_with_sync(original_cmd): - """Run a command with socket-based synchronization and return the process.""" - # Create a TCP socket for synchronization with better socket options - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sync_sock: - # Set SO_REUSEADDR to avoid "Address already in use" errors - sync_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - sync_sock.bind(("127.0.0.1", 0)) # Let OS choose a free port - sync_port = sync_sock.getsockname()[1] - sync_sock.listen(1) - sync_sock.settimeout(_SYNC_TIMEOUT) - - # Get current working directory to preserve it - cwd = os.getcwd() - - # Build command using the sync coordinator - target_args = original_cmd[1:] # Remove python executable - cmd = (sys.executable, "-m", "profiling.sampling._sync_coordinator", str(sync_port), cwd) + tuple(target_args) - - # Start the process with coordinator - process = subprocess.Popen(cmd) - - try: - # Wait for ready signal with timeout - with sync_sock.accept()[0] as conn: - ready_signal = conn.recv(_RECV_BUFFER_SIZE) - - if ready_signal != _READY_MESSAGE: - raise RuntimeError(f"Invalid ready signal received: {ready_signal!r}") - - except socket.timeout: - # If we timeout, kill the process and raise an error - if process.poll() is None: - process.terminate() - try: - process.wait(timeout=_PROCESS_KILL_TIMEOUT) - except subprocess.TimeoutExpired: - process.kill() - process.wait() - raise RuntimeError("Process failed to signal readiness within timeout") - - return process - - class SampleProfiler: @@ -168,6 +58,10 @@ def sample(self, collector, duration_sec=10): last_realtime_update = start_time while running_time < duration_sec: + # Check if live collector wants to stop + if hasattr(collector, 'running') and not collector.running: + break + current_time = time.perf_counter() if next_time < current_time: try: @@ -177,6 +71,7 @@ def sample(self, collector, duration_sec=10): duration_sec = current_time - start_time break except (RuntimeError, UnicodeDecodeError, MemoryError, OSError): + collector.collect_failed_sample() errors += 1 except Exception as e: if not self._is_process_running(): @@ -213,16 +108,19 @@ def sample(self, collector, duration_sec=10): sample_rate = num_samples / running_time error_rate = (errors / num_samples) * 100 if num_samples > 0 else 0 - print(f"Captured {num_samples} samples in {running_time:.2f} seconds") - print(f"Sample rate: {sample_rate:.2f} samples/sec") - print(f"Error rate: {error_rate:.2f}%") + # Don't print stats for live mode (curses is handling display) + is_live_mode = LiveStatsCollector is not None and isinstance(collector, LiveStatsCollector) + if not is_live_mode: + print(f"Captured {num_samples} samples in {running_time:.2f} seconds") + print(f"Sample rate: {sample_rate:.2f} samples/sec") + print(f"Error rate: {error_rate:.2f}%") # Pass stats to flamegraph collector if it's the right type if hasattr(collector, 'set_stats'): collector.set_stats(self.sample_interval_usec, running_time, sample_rate, error_rate) expected_samples = int(duration_sec / sample_interval_sec) - if num_samples < expected_samples: + if num_samples < expected_samples and not is_live_mode: print( f"Warning: missed {expected_samples - num_samples} samples " f"from the expected total of {expected_samples} " @@ -277,700 +175,126 @@ def _print_realtime_stats(self): ) -def _determine_best_unit(max_value): - """Determine the best unit (s, ms, μs) and scale factor for a maximum value.""" - if max_value >= 1.0: - return "s", 1.0 - elif max_value >= 0.001: - return "ms", 1000.0 - else: - return "μs", 1000000.0 - - -def print_sampled_stats( - stats, sort=-1, limit=None, show_summary=True, sample_interval_usec=100 -): - # Get the stats data - stats_list = [] - for func, ( - direct_calls, - cumulative_calls, - total_time, - cumulative_time, - callers, - ) in stats.stats.items(): - stats_list.append( - ( - func, - direct_calls, - cumulative_calls, - total_time, - cumulative_time, - callers, - ) - ) - - # Calculate total samples for percentage calculations (using direct_calls) - total_samples = sum( - direct_calls for _, direct_calls, _, _, _, _ in stats_list - ) - - # Sort based on the requested field - sort_field = sort - if sort_field == -1: # stdname - stats_list.sort(key=lambda x: str(x[0])) - elif sort_field == 0: # nsamples (direct samples) - stats_list.sort(key=lambda x: x[1], reverse=True) # direct_calls - elif sort_field == 1: # tottime - stats_list.sort(key=lambda x: x[3], reverse=True) # total_time - elif sort_field == 2: # cumtime - stats_list.sort(key=lambda x: x[4], reverse=True) # cumulative_time - elif sort_field == 3: # sample% - stats_list.sort( - key=lambda x: (x[1] / total_samples * 100) - if total_samples > 0 - else 0, - reverse=True, # direct_calls percentage - ) - elif sort_field == 4: # cumul% - stats_list.sort( - key=lambda x: (x[2] / total_samples * 100) - if total_samples > 0 - else 0, - reverse=True, # cumulative_calls percentage - ) - elif sort_field == 5: # nsamples (cumulative samples) - stats_list.sort(key=lambda x: x[2], reverse=True) # cumulative_calls - - # Apply limit if specified - if limit is not None: - stats_list = stats_list[:limit] - - # Determine the best unit for time columns based on maximum values - max_total_time = max( - (total_time for _, _, _, total_time, _, _ in stats_list), default=0 - ) - max_cumulative_time = max( - (cumulative_time for _, _, _, _, cumulative_time, _ in stats_list), - default=0, - ) - - total_time_unit, total_time_scale = _determine_best_unit(max_total_time) - cumulative_time_unit, cumulative_time_scale = _determine_best_unit( - max_cumulative_time - ) - - # Define column widths for consistent alignment - col_widths = { - "nsamples": 15, # "nsamples" column (inline/cumulative format) - "sample_pct": 8, # "sample%" column - "tottime": max(12, len(f"tottime ({total_time_unit})")), - "cum_pct": 8, # "cumul%" column - "cumtime": max(12, len(f"cumtime ({cumulative_time_unit})")), - } - - # Print header with colors and proper alignment - print(f"{ANSIColors.BOLD_BLUE}Profile Stats:{ANSIColors.RESET}") - - header_nsamples = f"{ANSIColors.BOLD_BLUE}{'nsamples':>{col_widths['nsamples']}}{ANSIColors.RESET}" - header_sample_pct = f"{ANSIColors.BOLD_BLUE}{'sample%':>{col_widths['sample_pct']}}{ANSIColors.RESET}" - header_tottime = f"{ANSIColors.BOLD_BLUE}{f'tottime ({total_time_unit})':>{col_widths['tottime']}}{ANSIColors.RESET}" - header_cum_pct = f"{ANSIColors.BOLD_BLUE}{'cumul%':>{col_widths['cum_pct']}}{ANSIColors.RESET}" - header_cumtime = f"{ANSIColors.BOLD_BLUE}{f'cumtime ({cumulative_time_unit})':>{col_widths['cumtime']}}{ANSIColors.RESET}" - header_filename = ( - f"{ANSIColors.BOLD_BLUE}filename:lineno(function){ANSIColors.RESET}" - ) - - print( - f"{header_nsamples} {header_sample_pct} {header_tottime} {header_cum_pct} {header_cumtime} {header_filename}" - ) - - # Print each line with proper alignment - for ( - func, - direct_calls, - cumulative_calls, - total_time, - cumulative_time, - callers, - ) in stats_list: - # Calculate percentages - sample_pct = ( - (direct_calls / total_samples * 100) if total_samples > 0 else 0 - ) - cum_pct = ( - (cumulative_calls / total_samples * 100) - if total_samples > 0 - else 0 - ) - - # Format values with proper alignment - always use A/B format - nsamples_str = f"{direct_calls}/{cumulative_calls}" - nsamples_str = f"{nsamples_str:>{col_widths['nsamples']}}" - sample_pct_str = f"{sample_pct:{col_widths['sample_pct']}.1f}" - tottime = f"{total_time * total_time_scale:{col_widths['tottime']}.3f}" - cum_pct_str = f"{cum_pct:{col_widths['cum_pct']}.1f}" - cumtime = f"{cumulative_time * cumulative_time_scale:{col_widths['cumtime']}.3f}" - - # Format the function name with colors - func_name = ( - f"{ANSIColors.GREEN}{func[0]}{ANSIColors.RESET}:" - f"{ANSIColors.YELLOW}{func[1]}{ANSIColors.RESET}(" - f"{ANSIColors.CYAN}{func[2]}{ANSIColors.RESET})" - ) - - # Print the formatted line with consistent spacing - print( - f"{nsamples_str} {sample_pct_str} {tottime} {cum_pct_str} {cumtime} {func_name}" - ) - - # Print legend - print(f"\n{ANSIColors.BOLD_BLUE}Legend:{ANSIColors.RESET}") - print( - f" {ANSIColors.YELLOW}nsamples{ANSIColors.RESET}: Direct/Cumulative samples (direct executing / on call stack)" - ) - print( - f" {ANSIColors.YELLOW}sample%{ANSIColors.RESET}: Percentage of total samples this function was directly executing" - ) - print( - f" {ANSIColors.YELLOW}tottime{ANSIColors.RESET}: Estimated total time spent directly in this function" - ) - print( - f" {ANSIColors.YELLOW}cumul%{ANSIColors.RESET}: Percentage of total samples when this function was on the call stack" - ) - print( - f" {ANSIColors.YELLOW}cumtime{ANSIColors.RESET}: Estimated cumulative time (including time in called functions)" - ) - print( - f" {ANSIColors.YELLOW}filename:lineno(function){ANSIColors.RESET}: Function location and name" - ) - - def _format_func_name(func): - """Format function name with colors.""" - return ( - f"{ANSIColors.GREEN}{func[0]}{ANSIColors.RESET}:" - f"{ANSIColors.YELLOW}{func[1]}{ANSIColors.RESET}(" - f"{ANSIColors.CYAN}{func[2]}{ANSIColors.RESET})" - ) - - def _print_top_functions(stats_list, title, key_func, format_line, n=3): - """Print top N functions sorted by key_func with formatted output.""" - print(f"\n{ANSIColors.BOLD_BLUE}{title}:{ANSIColors.RESET}") - sorted_stats = sorted(stats_list, key=key_func, reverse=True) - for stat in sorted_stats[:n]: - if line := format_line(stat): - print(f" {line}") - - # Print summary of interesting functions if enabled - if show_summary and stats_list: - print( - f"\n{ANSIColors.BOLD_BLUE}Summary of Interesting Functions:{ANSIColors.RESET}" - ) - - # Aggregate stats by fully qualified function name (ignoring line numbers) - func_aggregated = {} - for ( - func, - direct_calls, - cumulative_calls, - total_time, - cumulative_time, - callers, - ) in stats_list: - # Use filename:function_name as the key to get fully qualified name - qualified_name = f"{func[0]}:{func[2]}" - if qualified_name not in func_aggregated: - func_aggregated[qualified_name] = [ - 0, - 0, - 0, - 0, - ] # direct_calls, cumulative_calls, total_time, cumulative_time - func_aggregated[qualified_name][0] += direct_calls - func_aggregated[qualified_name][1] += cumulative_calls - func_aggregated[qualified_name][2] += total_time - func_aggregated[qualified_name][3] += cumulative_time - - # Convert aggregated data back to list format for processing - aggregated_stats = [] - for qualified_name, ( - prim_calls, - total_calls, - total_time, - cumulative_time, - ) in func_aggregated.items(): - # Parse the qualified name back to filename and function name - if ":" in qualified_name: - filename, func_name = qualified_name.rsplit(":", 1) - else: - filename, func_name = "", qualified_name - # Create a dummy func tuple with filename and function name for display - dummy_func = (filename, "", func_name) - aggregated_stats.append( - ( - dummy_func, - prim_calls, - total_calls, - total_time, - cumulative_time, - {}, - ) - ) - - # Determine best units for summary metrics - max_total_time = max( - (total_time for _, _, _, total_time, _, _ in aggregated_stats), - default=0, - ) - max_cumulative_time = max( - ( - cumulative_time - for _, _, _, _, cumulative_time, _ in aggregated_stats - ), - default=0, - ) - - total_unit, total_scale = _determine_best_unit(max_total_time) - cumulative_unit, cumulative_scale = _determine_best_unit( - max_cumulative_time - ) - - # Functions with highest direct/cumulative ratio (hot spots) - def format_hotspots(stat): - func, direct_calls, cumulative_calls, total_time, _, _ = stat - if direct_calls > 0 and cumulative_calls > 0: - ratio = direct_calls / cumulative_calls - direct_pct = ( - (direct_calls / total_samples * 100) - if total_samples > 0 - else 0 - ) - return ( - f"{ratio:.3f} direct/cumulative ratio, " - f"{direct_pct:.1f}% direct samples: {_format_func_name(func)}" - ) - return None - - _print_top_functions( - aggregated_stats, - "Functions with Highest Direct/Cumulative Ratio (Hot Spots)", - key_func=lambda x: (x[1] / x[2]) if x[2] > 0 else 0, - format_line=format_hotspots, - ) - - # Functions with highest call frequency (cumulative/direct difference) - def format_call_frequency(stat): - func, direct_calls, cumulative_calls, total_time, _, _ = stat - if cumulative_calls > direct_calls: - call_frequency = cumulative_calls - direct_calls - cum_pct = ( - (cumulative_calls / total_samples * 100) - if total_samples > 0 - else 0 - ) - return ( - f"{call_frequency:d} indirect calls, " - f"{cum_pct:.1f}% total stack presence: {_format_func_name(func)}" - ) - return None - - _print_top_functions( - aggregated_stats, - "Functions with Highest Call Frequency (Indirect Calls)", - key_func=lambda x: x[2] - x[1], # Sort by (cumulative - direct) - format_line=format_call_frequency, - ) - - # Functions with highest cumulative-to-direct multiplier (call magnification) - def format_call_magnification(stat): - func, direct_calls, cumulative_calls, total_time, _, _ = stat - if direct_calls > 0 and cumulative_calls > direct_calls: - multiplier = cumulative_calls / direct_calls - indirect_calls = cumulative_calls - direct_calls - return ( - f"{multiplier:.1f}x call magnification, " - f"{indirect_calls:d} indirect calls from {direct_calls:d} direct: {_format_func_name(func)}" - ) - return None - - _print_top_functions( - aggregated_stats, - "Functions with Highest Call Magnification (Cumulative/Direct)", - key_func=lambda x: (x[2] / x[1]) - if x[1] > 0 - else 0, # Sort by cumulative/direct ratio - format_line=format_call_magnification, - ) - - def sample( pid, + collector, *, - sort=2, - sample_interval_usec=100, duration_sec=10, - filename=None, all_threads=False, - limit=None, - show_summary=True, - output_format="pstats", realtime_stats=False, mode=PROFILING_MODE_WALL, native=False, gc=True, ): + """Sample a process using the provided collector. + + Args: + pid: Process ID to sample + collector: Collector instance to use for gathering samples + duration_sec: How long to sample for (seconds) + all_threads: Whether to sample all threads + realtime_stats: Whether to print real-time sampling statistics + mode: Profiling mode - WALL (all samples), CPU (only when on CPU), + GIL (only when holding GIL), ALL (includes GIL and CPU status) + native: Whether to include native frames + gc: Whether to include GC frames + + Returns: + The collector with collected samples + """ + # Get sample interval from collector + sample_interval_usec = collector.sample_interval_usec + # PROFILING_MODE_ALL implies no skipping at all if mode == PROFILING_MODE_ALL: skip_non_matching_threads = False - skip_idle = False else: - # Determine skip settings based on output format and mode - skip_non_matching_threads = output_format != "gecko" - skip_idle = mode != PROFILING_MODE_WALL + # For most modes, skip non-matching threads + # Gecko collector overrides this by setting skip_idle=False + skip_non_matching_threads = True profiler = SampleProfiler( - pid, sample_interval_usec, all_threads=all_threads, mode=mode, native=native, gc=gc, + pid, + sample_interval_usec, + all_threads=all_threads, + mode=mode, + native=native, + gc=gc, skip_non_matching_threads=skip_non_matching_threads ) profiler.realtime_stats = realtime_stats - collector = None - match output_format: - case "pstats": - collector = PstatsCollector(sample_interval_usec, skip_idle=skip_idle) - case "collapsed": - collector = CollapsedStackCollector(skip_idle=skip_idle) - filename = filename or f"collapsed.{pid}.txt" - case "flamegraph": - collector = FlamegraphCollector(skip_idle=skip_idle) - filename = filename or f"flamegraph.{pid}.html" - case "gecko": - # Gecko format never skips idle threads to show full thread states - collector = GeckoCollector(skip_idle=False) - filename = filename or f"gecko.{pid}.json" - case _: - raise ValueError(f"Invalid output format: {output_format}") - + # Run the sampling profiler.sample(collector, duration_sec) - if output_format == "pstats" and not filename: - stats = pstats.SampledStats(collector).strip_dirs() - if not stats.stats: - print("No samples were collected.") - if mode == PROFILING_MODE_CPU: - print("This can happen in CPU mode when all threads are idle.") - else: - print_sampled_stats( - stats, sort, limit, show_summary, sample_interval_usec - ) - else: - collector.export(filename) + return collector -def _validate_collapsed_format_args(args, parser): - # Check for incompatible pstats options - invalid_opts = [] - - # Get list of pstats-specific options - pstats_options = {"sort": None, "limit": None, "no_summary": False} - - # Find the default values from the argument definitions - for action in parser._actions: - if action.dest in pstats_options and hasattr(action, "default"): - pstats_options[action.dest] = action.default - - # Check if any pstats-specific options were provided by comparing with defaults - for opt, default in pstats_options.items(): - if getattr(args, opt) != default: - invalid_opts.append(opt.replace("no_", "")) - - if invalid_opts: - parser.error( - f"The following options are only valid with --pstats format: {', '.join(invalid_opts)}" - ) - - # Set default output filename for collapsed format only if we have a PID - # For module/script execution, this will be set later with the subprocess PID - if not args.outfile and args.pid is not None: - args.outfile = f"collapsed.{args.pid}.txt" - - -def wait_for_process_and_sample(pid, sort_value, args): - """Sample the process immediately since it has already signaled readiness.""" - # Set default filename with subprocess PID if not already set - filename = args.outfile - if not filename: - if args.format == "collapsed": - filename = f"collapsed.{pid}.txt" - elif args.format == "gecko": - filename = f"gecko.{pid}.json" +def sample_live( + pid, + collector, + *, + duration_sec=10, + all_threads=False, + realtime_stats=False, + mode=PROFILING_MODE_WALL, + native=False, + gc=True, +): + """Sample a process in live/interactive mode with curses TUI. + + Args: + pid: Process ID to sample + collector: LiveStatsCollector instance + duration_sec: How long to sample for (seconds) + all_threads: Whether to sample all threads + realtime_stats: Whether to print real-time sampling statistics + mode: Profiling mode - WALL (all samples), CPU (only when on CPU), + GIL (only when holding GIL), ALL (includes GIL and CPU status) + native: Whether to include native frames + gc: Whether to include GC frames + + Returns: + The collector with collected samples + """ + import curses + + # Get sample interval from collector + sample_interval_usec = collector.sample_interval_usec - mode = _parse_mode(args.mode) + # PROFILING_MODE_ALL implies no skipping at all + if mode == PROFILING_MODE_ALL: + skip_non_matching_threads = False + else: + skip_non_matching_threads = True - sample( + profiler = SampleProfiler( pid, - sort=sort_value, - sample_interval_usec=args.interval, - duration_sec=args.duration, - filename=filename, - all_threads=args.all_threads, - limit=args.limit, - show_summary=not args.no_summary, - output_format=args.format, - realtime_stats=args.realtime_stats, + sample_interval_usec, + all_threads=all_threads, mode=mode, - native=args.native, - gc=args.gc, - ) - - -def main(): - # Create the main parser - parser = argparse.ArgumentParser( - description=_HELP_DESCRIPTION, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - - # Target selection - target_group = parser.add_mutually_exclusive_group(required=False) - target_group.add_argument( - "-p", "--pid", type=int, help="Process ID to sample" - ) - target_group.add_argument( - "-m", "--module", - help="Run and profile a module as python -m module [ARGS...]" - ) - parser.add_argument( - "args", - nargs=argparse.REMAINDER, - help="Script to run and profile, with optional arguments" - ) - - # Sampling options - sampling_group = parser.add_argument_group("Sampling configuration") - sampling_group.add_argument( - "-i", - "--interval", - type=int, - default=100, - help="Sampling interval in microseconds (default: 100)", - ) - sampling_group.add_argument( - "-d", - "--duration", - type=int, - default=10, - help="Sampling duration in seconds (default: 10)", - ) - sampling_group.add_argument( - "-a", - "--all-threads", - action="store_true", - help="Sample all threads in the process instead of just the main thread", - ) - sampling_group.add_argument( - "--realtime-stats", - action="store_true", - help="Print real-time sampling statistics (Hz, mean, min, max, stdev) during profiling", - ) - sampling_group.add_argument( - "--native", - action="store_true", - help="Include artificial \"\" frames to denote calls to non-Python code.", - ) - sampling_group.add_argument( - "--no-gc", - action="store_false", - dest="gc", - help="Don't include artificial \"\" frames to denote active garbage collection.", - ) - - # Mode options - mode_group = parser.add_argument_group("Mode options") - mode_group.add_argument( - "--mode", - choices=["wall", "cpu", "gil"], - default="wall", - help="Sampling mode: wall (all threads), cpu (only CPU-running threads), gil (only GIL-holding threads) (default: wall)", - ) - - # Output format selection - output_group = parser.add_argument_group("Output options") - output_format = output_group.add_mutually_exclusive_group() - output_format.add_argument( - "--pstats", - action="store_const", - const="pstats", - dest="format", - default="pstats", - help="Generate pstats output (default)", - ) - output_format.add_argument( - "--collapsed", - action="store_const", - const="collapsed", - dest="format", - help="Generate collapsed stack traces for flamegraphs", - ) - output_format.add_argument( - "--flamegraph", - action="store_const", - const="flamegraph", - dest="format", - help="Generate HTML flamegraph visualization", - ) - output_format.add_argument( - "--gecko", - action="store_const", - const="gecko", - dest="format", - help="Generate Gecko format for Firefox Profiler", - ) - - output_group.add_argument( - "-o", - "--outfile", - help="Save output to a file (if omitted, prints to stdout for pstats, " - "or saves to collapsed..txt or flamegraph..html for the " - "respective output formats)" - ) - - # pstats-specific options - pstats_group = parser.add_argument_group("pstats format options") - sort_group = pstats_group.add_mutually_exclusive_group() - sort_group.add_argument( - "--sort-nsamples", - action="store_const", - const=0, - dest="sort", - help="Sort by number of direct samples (nsamples column)", - ) - sort_group.add_argument( - "--sort-tottime", - action="store_const", - const=1, - dest="sort", - help="Sort by total time (tottime column)", - ) - sort_group.add_argument( - "--sort-cumtime", - action="store_const", - const=2, - dest="sort", - help="Sort by cumulative time (cumtime column, default)", - ) - sort_group.add_argument( - "--sort-sample-pct", - action="store_const", - const=3, - dest="sort", - help="Sort by sample percentage (sample%% column)", - ) - sort_group.add_argument( - "--sort-cumul-pct", - action="store_const", - const=4, - dest="sort", - help="Sort by cumulative sample percentage (cumul%% column)", - ) - sort_group.add_argument( - "--sort-nsamples-cumul", - action="store_const", - const=5, - dest="sort", - help="Sort by cumulative samples (nsamples column, cumulative part)", - ) - sort_group.add_argument( - "--sort-name", - action="store_const", - const=-1, - dest="sort", - help="Sort by function name", - ) - - pstats_group.add_argument( - "-l", - "--limit", - type=int, - help="Limit the number of rows in the output", - default=15, - ) - pstats_group.add_argument( - "--no-summary", - action="store_true", - help="Disable the summary section in the output", + native=native, + gc=gc, + skip_non_matching_threads=skip_non_matching_threads ) + profiler.realtime_stats = realtime_stats - args = parser.parse_args() - - # Validate format-specific arguments - if args.format in ("collapsed", "gecko"): - _validate_collapsed_format_args(args, parser) - - # Validate that --mode is not used with --gecko - if args.format == "gecko" and args.mode != "wall": - parser.error("--mode option is incompatible with --gecko format. Gecko format automatically uses ALL mode (GIL + CPU analysis).") - - sort_value = args.sort if args.sort is not None else 2 - - if args.module is not None and not args.module: - parser.error("argument -m/--module: expected one argument") - - # Validate that we have exactly one target type - # Note: args can be present with -m (module arguments) but not as standalone script - has_pid = args.pid is not None - has_module = args.module is not None - has_script = bool(args.args) and args.module is None - - target_count = sum([has_pid, has_module, has_script]) - - if target_count == 0: - parser.error("one of the arguments -p/--pid -m/--module or script name is required") - elif target_count > 1: - parser.error("only one target type can be specified: -p/--pid, -m/--module, or script") - - # Use PROFILING_MODE_ALL for gecko format, otherwise parse user's choice - if args.format == "gecko": - mode = PROFILING_MODE_ALL - else: - mode = _parse_mode(args.mode) - - if args.pid: - sample( - args.pid, - sample_interval_usec=args.interval, - duration_sec=args.duration, - filename=args.outfile, - all_threads=args.all_threads, - limit=args.limit, - sort=sort_value, - show_summary=not args.no_summary, - output_format=args.format, - realtime_stats=args.realtime_stats, - mode=mode, - native=args.native, - gc=args.gc, - ) - elif args.module or args.args: - if args.module: - cmd = (sys.executable, "-m", args.module, *args.args) - else: - cmd = (sys.executable, *args.args) - - # Use synchronized process startup - process = _run_with_sync(cmd) - - # Process has already signaled readiness, start sampling immediately + def curses_wrapper_func(stdscr): + collector.init_curses(stdscr) try: - wait_for_process_and_sample(process.pid, sort_value, args) + profiler.sample(collector, duration_sec) + # Mark as finished and keep the TUI running until user presses 'q' + collector.mark_finished() + # Keep processing input until user quits + while collector.running: + collector._handle_input() + time.sleep(0.05) # Small sleep to avoid busy waiting finally: - if process.poll() is None: - process.terminate() - try: - process.wait(timeout=2) - except subprocess.TimeoutExpired: - process.kill() - process.wait() + collector.cleanup_curses() + + try: + curses.wrapper(curses_wrapper_func) + except KeyboardInterrupt: + pass -if __name__ == "__main__": - main() + return collector diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py index 1436811976a16e..51d13a648bfa49 100644 --- a/Lib/profiling/sampling/stack_collector.py +++ b/Lib/profiling/sampling/stack_collector.py @@ -11,7 +11,8 @@ class StackTraceCollector(Collector): - def __init__(self, *, skip_idle=False): + def __init__(self, sample_interval_usec, *, skip_idle=False): + self.sample_interval_usec = sample_interval_usec self.skip_idle = skip_idle def collect(self, stack_frames, skip_idle=False): diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py index 293c3189589e36..11ffc6bf3a1bb5 100644 --- a/Lib/pydoc_data/topics.py +++ b/Lib/pydoc_data/topics.py @@ -1,4 +1,4 @@ -# Autogenerated by Sphinx on Tue Oct 14 13:46:01 2025 +# Autogenerated by Sphinx on Tue Nov 18 16:51:09 2025 # as part of the release process. topics = { @@ -1098,10 +1098,10 @@ class and instance attributes applies as for regular assignments. 'bltin-ellipsis-object': r'''The Ellipsis Object ******************* -This object is commonly used used to indicate that something is -omitted. It supports no special operations. There is exactly one -ellipsis object, named "Ellipsis" (a built-in name). -"type(Ellipsis)()" produces the "Ellipsis" singleton. +This object is commonly used to indicate that something is omitted. It +supports no special operations. There is exactly one ellipsis object, +named "Ellipsis" (a built-in name). "type(Ellipsis)()" produces the +"Ellipsis" singleton. It is written as "Ellipsis" or "...". @@ -4140,6 +4140,10 @@ def double(x): available for commands and command arguments, e.g. the current global and local names are offered as arguments of the "p" command. + +Command-line interface +====================== + You can also invoke "pdb" from the command line to debug other scripts. For example: @@ -4155,7 +4159,7 @@ def double(x): -c, --command To execute commands as if given in a ".pdbrc" file; see Debugger - Commands. + commands. Changed in version 3.2: Added the "-c" option. @@ -4376,7 +4380,7 @@ class pdb.Pdb(completekey='tab', stdin=None, stdout=None, skip=None, nosigint=Fa See the documentation for the functions explained above. -Debugger Commands +Debugger commands ================= The commands recognized by the debugger are listed below. Most @@ -5616,9 +5620,8 @@ class of the instance or a *non-virtual base class* thereof. The 2.71828 4.0 -Unlike in integer literals, leading zeros are allowed in the numeric -parts. For example, "077.010" is legal, and denotes the same number as -"77.10". +Unlike in integer literals, leading zeros are allowed. For example, +"077.010" is legal, and denotes the same number as "77.01". As in integer literals, single underscores may occur between digits to help readability: @@ -7435,9 +7438,8 @@ class body. A "SyntaxError" is raised if a variable is used or 2.71828 4.0 -Unlike in integer literals, leading zeros are allowed in the numeric -parts. For example, "077.010" is legal, and denotes the same number as -"77.10". +Unlike in integer literals, leading zeros are allowed. For example, +"077.010" is legal, and denotes the same number as "77.01". As in integer literals, single underscores may occur between digits to help readability: @@ -7685,9 +7687,8 @@ class that has an "__rsub__()" method, "type(y).__rsub__(y, x)" is ************************* *Objects* are Python’s abstraction for data. All data in a Python -program is represented by objects or by relations between objects. (In -a sense, and in conformance to Von Neumann’s model of a “stored -program computer”, code is also represented by objects.) +program is represented by objects or by relations between objects. +Even code is represented by objects. Every object has an identity, a type and a value. An object’s *identity* never changes once it has been created; you may think of it @@ -10301,6 +10302,17 @@ class is used in a class pattern with positional arguments, each follow uncased characters and lowercase characters only cased ones. Return "False" otherwise. + For example: + + >>> 'Spam, Spam, Spam'.istitle() + True + >>> 'spam, spam, spam'.istitle() + False + >>> 'SPAM, SPAM, SPAM'.istitle() + False + + See also "title()". + str.isupper() Return "True" if all cased characters [4] in the string are @@ -10663,6 +10675,8 @@ class is used in a class pattern with positional arguments, each >>> titlecase("they're bill's friends.") "They're Bill's Friends." + See also "istitle()". + str.translate(table, /) Return a copy of the string in which each character has been mapped @@ -12362,6 +12376,11 @@ class method object, it is transformed into an instance method object | | "X.__bases__" will be exactly equal to "(A, B, | | | C)". | +----------------------------------------------------+----------------------------------------------------+ +| type.__base__ | **CPython implementation detail:** The single base | +| | class in the inheritance chain that is responsible | +| | for the memory layout of instances. This attribute | +| | corresponds to "tp_base" at the C level. | ++----------------------------------------------------+----------------------------------------------------+ | type.__doc__ | The class’s documentation string, or "None" if | | | undefined. Not inherited by subclasses. | +----------------------------------------------------+----------------------------------------------------+ diff --git a/Lib/shelve.py b/Lib/shelve.py index 1010be1e09d702..9f6296667fdb6b 100644 --- a/Lib/shelve.py +++ b/Lib/shelve.py @@ -57,7 +57,6 @@ """ from pickle import DEFAULT_PROTOCOL, dumps, loads -from io import BytesIO import collections.abc diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 0f9c5c222250ae..9f8412fe9394eb 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -6956,28 +6956,13 @@ def test_std_streams_flushed_after_preload(self): if multiprocessing.get_start_method() != "forkserver": self.skipTest("forkserver specific test") - # Create a test module in the temporary directory on the child's path - # TODO: This can all be simplified once gh-126631 is fixed and we can - # use __main__ instead of a module. - dirname = os.path.join(self._temp_dir, 'preloaded_module') - init_name = os.path.join(dirname, '__init__.py') - os.mkdir(dirname) - with open(init_name, "w") as f: - cmd = '''if 1: - import sys - print('stderr', end='', file=sys.stderr) - print('stdout', end='', file=sys.stdout) - ''' - f.write(cmd) - name = os.path.join(os.path.dirname(__file__), 'mp_preload_flush.py') - env = {'PYTHONPATH': self._temp_dir} - _, out, err = test.support.script_helper.assert_python_ok(name, **env) + _, out, err = test.support.script_helper.assert_python_ok(name) # Check stderr first, as it is more likely to be useful to see in the # event of a failure. - self.assertEqual(err.decode().rstrip(), 'stderr') - self.assertEqual(out.decode().rstrip(), 'stdout') + self.assertEqual(err.decode().rstrip(), '__main____mp_main__') + self.assertEqual(out.decode().rstrip(), '__main____mp_main__') class MiscTestCase(unittest.TestCase): diff --git a/Lib/test/mp_preload_flush.py b/Lib/test/mp_preload_flush.py index 3501554d366a21..c195a9ef6b26fe 100644 --- a/Lib/test/mp_preload_flush.py +++ b/Lib/test/mp_preload_flush.py @@ -1,15 +1,11 @@ import multiprocessing import sys -modname = 'preloaded_module' +print(__name__, end='', file=sys.stderr) +print(__name__, end='', file=sys.stdout) if __name__ == '__main__': - if modname in sys.modules: - raise AssertionError(f'{modname!r} is not in sys.modules') multiprocessing.set_start_method('forkserver') - multiprocessing.set_forkserver_preload([modname]) for _ in range(2): p = multiprocessing.Process() p.start() p.join() -elif modname not in sys.modules: - raise AssertionError(f'{modname!r} is not in sys.modules') diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 1814a55b74ea0c..185aa3fce39149 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -90,6 +90,18 @@ def checkcall(self, obj, methodname, *args): args = self.fixtype(args) getattr(obj, methodname)(*args) + def _get_teststrings(self, charset, digits): + base = len(charset) + teststrings = set() + for i in range(base ** digits): + entry = [] + for j in range(digits): + i, m = divmod(i, base) + entry.append(charset[m]) + teststrings.add(''.join(entry)) + teststrings = [self.fixtype(ts) for ts in teststrings] + return teststrings + def test_count(self): self.checkequal(3, 'aaa', 'count', 'a') self.checkequal(0, 'aaa', 'count', 'b') @@ -130,17 +142,7 @@ def test_count(self): # For a variety of combinations, # verify that str.count() matches an equivalent function # replacing all occurrences and then differencing the string lengths - charset = ['', 'a', 'b'] - digits = 7 - base = len(charset) - teststrings = set() - for i in range(base ** digits): - entry = [] - for j in range(digits): - i, m = divmod(i, base) - entry.append(charset[m]) - teststrings.add(''.join(entry)) - teststrings = [self.fixtype(ts) for ts in teststrings] + teststrings = self._get_teststrings(['', 'a', 'b'], 7) for i in teststrings: n = len(i) for j in teststrings: @@ -197,17 +199,7 @@ def test_find(self): # For a variety of combinations, # verify that str.find() matches __contains__ # and that the found substring is really at that location - charset = ['', 'a', 'b', 'c'] - digits = 5 - base = len(charset) - teststrings = set() - for i in range(base ** digits): - entry = [] - for j in range(digits): - i, m = divmod(i, base) - entry.append(charset[m]) - teststrings.add(''.join(entry)) - teststrings = [self.fixtype(ts) for ts in teststrings] + teststrings = self._get_teststrings(['', 'a', 'b', 'c'], 5) for i in teststrings: for j in teststrings: loc = i.find(j) @@ -244,17 +236,7 @@ def test_rfind(self): # For a variety of combinations, # verify that str.rfind() matches __contains__ # and that the found substring is really at that location - charset = ['', 'a', 'b', 'c'] - digits = 5 - base = len(charset) - teststrings = set() - for i in range(base ** digits): - entry = [] - for j in range(digits): - i, m = divmod(i, base) - entry.append(charset[m]) - teststrings.add(''.join(entry)) - teststrings = [self.fixtype(ts) for ts in teststrings] + teststrings = self._get_teststrings(['', 'a', 'b', 'c'], 5) for i in teststrings: for j in teststrings: loc = i.rfind(j) @@ -295,6 +277,19 @@ def test_index(self): else: self.checkraises(TypeError, 'hello', 'index', 42) + # For a variety of combinations, + # verify that str.index() matches __contains__ + # and that the found substring is really at that location + teststrings = self._get_teststrings(['', 'a', 'b', 'c'], 5) + for i in teststrings: + for j in teststrings: + if j in i: + loc = i.index(j) + self.assertGreaterEqual(loc, 0) + self.assertEqual(i[loc:loc+len(j)], j) + else: + self.assertRaises(ValueError, i.index, j) + def test_rindex(self): self.checkequal(12, 'abcdefghiabc', 'rindex', '') self.checkequal(3, 'abcdefghiabc', 'rindex', 'def') @@ -321,6 +316,19 @@ def test_rindex(self): else: self.checkraises(TypeError, 'hello', 'rindex', 42) + # For a variety of combinations, + # verify that str.rindex() matches __contains__ + # and that the found substring is really at that location + teststrings = self._get_teststrings(['', 'a', 'b', 'c'], 5) + for i in teststrings: + for j in teststrings: + if j in i: + loc = i.rindex(j) + self.assertGreaterEqual(loc, 0) + self.assertEqual(i[loc:loc+len(j)], j) + else: + self.assertRaises(ValueError, i.rindex, j) + def test_find_periodic_pattern(self): """Cover the special path for periodic patterns.""" def reference_find(p, s): diff --git a/Lib/test/test__colorize.py b/Lib/test/test__colorize.py index 25012466840f18..67e0595943d356 100644 --- a/Lib/test/test__colorize.py +++ b/Lib/test/test__colorize.py @@ -166,6 +166,17 @@ def test_colorized_detection_checks_for_file(self): file.isatty.return_value = False self.assertEqual(_colorize.can_colorize(file=file), False) + # The documentation for file.fileno says: + # > An OSError is raised if the IO object does not use a file descriptor. + # gh-141570: Check OSError is caught and handled + with unittest.mock.patch("os.isatty", side_effect=ZeroDivisionError): + file = unittest.mock.MagicMock() + file.fileno.side_effect = OSError + file.isatty.return_value = True + self.assertEqual(_colorize.can_colorize(file=file), True) + file.isatty.return_value = False + self.assertEqual(_colorize.can_colorize(file=file), False) + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 3a8be68a5468b0..ef90d4bcbb2a36 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -581,13 +581,22 @@ class TestOptionalsShortLong(ParserTestCase): class TestOptionalsDest(ParserTestCase): """Tests various means of setting destination""" - argument_signatures = [Sig('--foo-bar'), Sig('--baz', dest='zabbaz')] + argument_signatures = [ + Sig('-x', '-foobar', '--foo-bar', '-barfoo', '-X'), + Sig('--baz', dest='zabbaz'), + Sig('-y', '-qux', '-Y'), + Sig('-z'), + ] failures = ['a'] successes = [ - ('--foo-bar f', NS(foo_bar='f', zabbaz=None)), - ('--baz g', NS(foo_bar=None, zabbaz='g')), - ('--foo-bar h --baz i', NS(foo_bar='h', zabbaz='i')), - ('--baz j --foo-bar k', NS(foo_bar='k', zabbaz='j')), + ('--foo-bar f', NS(foo_bar='f', zabbaz=None, qux=None, z=None)), + ('-x f', NS(foo_bar='f', zabbaz=None, qux=None, z=None)), + ('--baz g', NS(foo_bar=None, zabbaz='g', qux=None, z=None)), + ('--foo-bar h --baz i', NS(foo_bar='h', zabbaz='i', qux=None, z=None)), + ('--baz j --foo-bar k', NS(foo_bar='k', zabbaz='j', qux=None, z=None)), + ('-qux l', NS(foo_bar=None, zabbaz=None, qux='l', z=None)), + ('-y l', NS(foo_bar=None, zabbaz=None, qux='l', z=None)), + ('-z m', NS(foo_bar=None, zabbaz=None, qux=None, z='m')), ] @@ -796,6 +805,76 @@ def test_invalid_name(self): self.assertEqual(str(cm.exception), "invalid option name '--no-foo' for BooleanOptionalAction") +class TestBooleanOptionalActionSingleDash(ParserTestCase): + """Tests BooleanOptionalAction with single dash""" + + argument_signatures = [ + Sig('-foo', '-x', action=argparse.BooleanOptionalAction), + ] + failures = ['--foo', '--no-foo', '-no-foo', '-no-x', '-nox'] + successes = [ + ('', NS(foo=None)), + ('-foo', NS(foo=True)), + ('-nofoo', NS(foo=False)), + ('-x', NS(foo=True)), + ] + + def test_invalid_name(self): + parser = argparse.ArgumentParser() + with self.assertRaises(ValueError) as cm: + parser.add_argument('-nofoo', action=argparse.BooleanOptionalAction) + self.assertEqual(str(cm.exception), + "invalid option name '-nofoo' for BooleanOptionalAction") + +class TestBooleanOptionalActionAlternatePrefixChars(ParserTestCase): + """Tests BooleanOptionalAction with custom prefixes""" + + parser_signature = Sig(prefix_chars='+-', add_help=False) + argument_signatures = [Sig('++foo', action=argparse.BooleanOptionalAction)] + failures = ['--foo', '--no-foo'] + successes = [ + ('', NS(foo=None)), + ('++foo', NS(foo=True)), + ('++no-foo', NS(foo=False)), + ] + + def test_invalid_name(self): + parser = argparse.ArgumentParser(prefix_chars='+/') + with self.assertRaisesRegex(ValueError, + 'BooleanOptionalAction.*is not valid for positional arguments'): + parser.add_argument('--foo', action=argparse.BooleanOptionalAction) + with self.assertRaises(ValueError) as cm: + parser.add_argument('++no-foo', action=argparse.BooleanOptionalAction) + self.assertEqual(str(cm.exception), + "invalid option name '++no-foo' for BooleanOptionalAction") + +class TestBooleanOptionalActionSingleAlternatePrefixChar(ParserTestCase): + """Tests BooleanOptionalAction with single alternate prefix char""" + + parser_signature = Sig(prefix_chars='+/', add_help=False) + argument_signatures = [ + Sig('+foo', '+x', action=argparse.BooleanOptionalAction), + ] + failures = ['++foo', '++no-foo', '++nofoo', + '-no-foo', '-nofoo', '+no-foo', '-nofoo', + '+no-x', '+nox', '-no-x', '-nox'] + successes = [ + ('', NS(foo=None)), + ('+foo', NS(foo=True)), + ('+nofoo', NS(foo=False)), + ('+x', NS(foo=True)), + ] + + def test_invalid_name(self): + parser = argparse.ArgumentParser(prefix_chars='+/') + with self.assertRaisesRegex(ValueError, + 'BooleanOptionalAction.*is not valid for positional arguments'): + parser.add_argument('-foo', action=argparse.BooleanOptionalAction) + with self.assertRaises(ValueError) as cm: + parser.add_argument('+nofoo', action=argparse.BooleanOptionalAction) + self.assertEqual(str(cm.exception), + "invalid option name '+nofoo' for BooleanOptionalAction") + class TestBooleanOptionalActionRequired(ParserTestCase): """Tests BooleanOptionalAction required""" @@ -5611,6 +5690,8 @@ def test_invalid_option_strings(self): self.assertTypeError('-', errmsg='dest= is required') self.assertTypeError('--', errmsg='dest= is required') self.assertTypeError('---', errmsg='dest= is required') + self.assertTypeError('-', '--', '---', + errmsg="dest= is required for options like '-', '--', '---'") def test_invalid_prefix(self): self.assertValueError('--foo', '+foo', @@ -7207,6 +7288,8 @@ def test_argparse_color(self): short_b = self.theme.short_option label_b = self.theme.label pos_b = self.theme.action + default = self.theme.default + default_value = self.theme.default_value reset = self.theme.reset # Act @@ -7233,17 +7316,17 @@ def test_argparse_color(self): {heading}options:{reset} {short_b}-h{reset}, {long_b}--help{reset} show this help message and exit - {short_b}-v{reset}, {long_b}--verbose{reset} more spam (default: False) - {short_b}-q{reset}, {long_b}--quiet{reset} less spam (default: False) + {short_b}-v{reset}, {long_b}--verbose{reset} more spam {default}(default: {default_value}False{default}){reset} + {short_b}-q{reset}, {long_b}--quiet{reset} less spam {default}(default: {default_value}False{default}){reset} {short_b}-o{reset}, {long_b}--optional1{reset} {long_b}--optional2{reset} {label_b}OPTIONAL2{reset} - pick one (default: None) + pick one {default}(default: {default_value}None{default}){reset} {long_b}--optional3{reset} {label_b}{{X,Y,Z}}{reset} - {long_b}--optional4{reset} {label_b}{{X,Y,Z}}{reset} pick one (default: None) - {long_b}--optional5{reset} {label_b}{{X,Y,Z}}{reset} pick one (default: None) - {long_b}--optional6{reset} {label_b}{{X,Y,Z}}{reset} pick one (default: None) + {long_b}--optional4{reset} {label_b}{{X,Y,Z}}{reset} pick one {default}(default: {default_value}None{default}){reset} + {long_b}--optional5{reset} {label_b}{{X,Y,Z}}{reset} pick one {default}(default: {default_value}None{default}){reset} + {long_b}--optional6{reset} {label_b}{{X,Y,Z}}{reset} pick one {default}(default: {default_value}None{default}){reset} {short_b}-p{reset}, {long_b}--optional7{reset} {label_b}{{Aaaaa,Bbbbb,Ccccc,Ddddd}}{reset} - pick one (default: None) + pick one {default}(default: {default_value}None{default}){reset} {short_b}+f{reset} {label_b}F{reset} {long_b}++bar{reset} {label_b}BAR{reset} {long_b}-+baz{reset} {label_b}BAZ{reset} diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 86898bfcab9135..a6cf899fa51e75 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -1524,6 +1524,32 @@ def test_take_bytes(self): self.assertRaises(BufferError, ba.take_bytes) self.assertEqual(ba.take_bytes(), b'abc') + @support.cpython_only # tests an implementation detail + def test_take_bytes_optimization(self): + # Validate optimization around taking lots of little chunks out of a + # much bigger buffer. Save work by only copying a little rather than + # moving a lot. + ba = bytearray(b'abcdef' + b'0' * 1000) + start_alloc = ba.__alloc__() + + # Take two bytes at a time, checking alloc doesn't change. + self.assertEqual(ba.take_bytes(2), b'ab') + self.assertEqual(ba.__alloc__(), start_alloc) + self.assertEqual(len(ba), 4 + 1000) + self.assertEqual(ba.take_bytes(2), b'cd') + self.assertEqual(ba.__alloc__(), start_alloc) + self.assertEqual(len(ba), 2 + 1000) + self.assertEqual(ba.take_bytes(2), b'ef') + self.assertEqual(ba.__alloc__(), start_alloc) + self.assertEqual(len(ba), 0 + 1000) + self.assertEqual(ba.__alloc__(), start_alloc) + + # Take over half, alloc shrinks to exact size. + self.assertEqual(ba.take_bytes(501), b'0' * 501) + self.assertEqual(len(ba), 499) + bytes_header_size = sys.getsizeof(b'') + self.assertEqual(ba.__alloc__(), 499 + bytes_header_size) + def test_setitem(self): def setitem_as_mapping(b, i, val): b[i] = val @@ -2629,6 +2655,10 @@ def zfill(b, a): c = a.zfill(0x400000) assert not c or c[-1] not in (0xdd, 0xcd) + def resize(b, a): # MODIFIES! + b.wait() + a.resize(10) + def take_bytes(b, a): # MODIFIES! b.wait() c = a.take_bytes() @@ -2702,6 +2732,8 @@ def check(funcs, a=None, *args): check([clear] + [startswith] * 10) check([clear] + [strip] * 10) + check([clear] + [resize] * 10) + check([clear] + [take_bytes] * 10) check([take_bytes_n] * 10, bytearray(b'0123456789' * 0x400)) check([take_bytes_n] * 10, bytearray(b'0123456789' * 5)) diff --git a/Lib/test/test_capi/test_module.py b/Lib/test/test_capi/test_module.py index 7ec23e637d7de6..823e2ab6b2ef0d 100644 --- a/Lib/test/test_capi/test_module.py +++ b/Lib/test/test_capi/test_module.py @@ -3,7 +3,7 @@ import unittest import types -from test.support import import_helper, subTests +from test.support import import_helper, subTests, requires_gil_enabled # Skip this test if the _testcapi module isn't available. _testcapi = import_helper.import_module('_testcapi') @@ -25,6 +25,7 @@ def def_and_token(mod): ) class TestModFromSlotsAndSpec(unittest.TestCase): + @requires_gil_enabled("empty slots re-enable GIL") def test_empty(self): mod = _testcapi.module_from_slots_empty(FakeSpec()) self.assertIsInstance(mod, types.ModuleType) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 25372fee58e0d7..51234a2e40f54f 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2480,8 +2480,6 @@ def testfunc(n): testfunc(_testinternalcapi.TIER2_THRESHOLD) - ex = get_first_executor(testfunc) - assert ex is not None """)) def test_pop_top_specialize_none(self): @@ -2664,6 +2662,38 @@ def f(): f" {executor} at offset {idx} rather" f" than expected _EXIT_TRACE") + def test_enter_executor_valid_op_arg(self): + script_helper.assert_python_ok("-c", textwrap.dedent(""" + import sys + sys.setrecursionlimit(30) # reduce time of the run + + str_v1 = '' + tuple_v2 = (None, None, None, None, None) + small_int_v3 = 4 + + def f1(): + + for _ in range(10): + abs(0) + + tuple_v2[small_int_v3] + tuple_v2[small_int_v3] + tuple_v2[small_int_v3] + + def recursive_wrapper_4569(): + str_v1 > str_v1 + str_v1 > str_v1 + str_v1 > str_v1 + recursive_wrapper_4569() + + recursive_wrapper_4569() + + for i_f1 in range(19000): + try: + f1() + except RecursionError: + pass + """)) def global_identity(x): diff --git a/Lib/test/test_copy.py b/Lib/test/test_copy.py index 467ec09d99e462..cfef24727e8c82 100644 --- a/Lib/test/test_copy.py +++ b/Lib/test/test_copy.py @@ -672,7 +672,7 @@ def __eq__(self, other): def test_reduce_5tuple(self): class C(dict): def __reduce__(self): - return (C, (), self.__dict__, None, self.items()) + return (C, (), self.__dict__, None, iter(self.items())) def __eq__(self, other): return (dict(self) == dict(other) and self.__dict__ == other.__dict__) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 14f94285d3f3c2..82a48ad4d1aced 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -1329,18 +1329,17 @@ class D(object): self.assertNotHasAttr(a, "__weakref__") a.foo = 42 self.assertEqual(a.__dict__, {"foo": 42}) + with self.assertRaises(TypeError): + weakref.ref(a) class W(object): __slots__ = ["__weakref__"] a = W() self.assertHasAttr(a, "__weakref__") self.assertNotHasAttr(a, "__dict__") - try: + with self.assertRaises(AttributeError): a.foo = 42 - except AttributeError: - pass - else: - self.fail("shouldn't be allowed to set a.foo") + self.assertIs(weakref.ref(a)(), a) class C1(W, D): __slots__ = [] @@ -1349,6 +1348,7 @@ class C1(W, D): self.assertHasAttr(a, "__weakref__") a.foo = 42 self.assertEqual(a.__dict__, {"foo": 42}) + self.assertIs(weakref.ref(a)(), a) class C2(D, W): __slots__ = [] @@ -1357,6 +1357,77 @@ class C2(D, W): self.assertHasAttr(a, "__weakref__") a.foo = 42 self.assertEqual(a.__dict__, {"foo": 42}) + self.assertIs(weakref.ref(a)(), a) + + @unittest.skipIf(_testcapi is None, 'need the _testcapi module') + def test_slots_special_before_items(self): + class D(_testcapi.HeapCCollection): + __slots__ = ["__dict__"] + a = D(1, 2, 3) + self.assertHasAttr(a, "__dict__") + self.assertNotHasAttr(a, "__weakref__") + a.foo = 42 + self.assertEqual(a.__dict__, {"foo": 42}) + with self.assertRaises(TypeError): + weakref.ref(a) + del a.__dict__ + self.assertNotHasAttr(a, "foo") + self.assertEqual(a.__dict__, {}) + self.assertEqual(list(a), [1, 2, 3]) + + class W(_testcapi.HeapCCollection): + __slots__ = ["__weakref__"] + a = W(1, 2, 3) + self.assertHasAttr(a, "__weakref__") + self.assertNotHasAttr(a, "__dict__") + with self.assertRaises(AttributeError): + a.foo = 42 + self.assertIs(weakref.ref(a)(), a) + + with self.assertRaises(TypeError): + class X(_testcapi.HeapCCollection): + __slots__ = ['x'] + + with self.assertRaises(TypeError): + class X(_testcapi.HeapCCollection): + __slots__ = ['__dict__', 'x'] + + @support.subTests(('base', 'arg'), [ + (tuple, (1, 2, 3)), + (int, 9876543210**2), + (bytes, b'ab'), + ]) + def test_slots_special_after_items(self, base, arg): + class D(base): + __slots__ = ["__dict__"] + a = D(arg) + self.assertHasAttr(a, "__dict__") + self.assertNotHasAttr(a, "__weakref__") + a.foo = 42 + self.assertEqual(a.__dict__, {"foo": 42}) + with self.assertRaises(TypeError): + weakref.ref(a) + del a.__dict__ + self.assertNotHasAttr(a, "foo") + self.assertEqual(a.__dict__, {}) + self.assertEqual(a, base(arg)) + + class W(base): + __slots__ = ["__weakref__"] + a = W(arg) + self.assertHasAttr(a, "__weakref__") + self.assertNotHasAttr(a, "__dict__") + with self.assertRaises(AttributeError): + a.foo = 42 + self.assertIs(weakref.ref(a)(), a) + self.assertEqual(a, base(arg)) + + with self.assertRaises(TypeError): + class X(base): + __slots__ = ['x'] + with self.assertRaises(TypeError): + class X(base): + __slots__ = ['__dict__', 'x'] def test_slots_special2(self): # Testing __qualname__ and __classcell__ in __slots__ diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 1078796eae84e2..b536794122787d 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -241,21 +241,7 @@ def test_repeated_init_and_inittab(self): def test_create_module_from_initfunc(self): out, err = self.run_embedded_interpreter("test_create_module_from_initfunc") - if support.Py_GIL_DISABLED: - # the test imports a singlephase init extension, so it emits a warning - # under the free-threaded build - expected_runtime_warning = ( - "RuntimeWarning: The global interpreter lock (GIL)" - " has been enabled to load module 'embedded_ext'" - ) - filtered_err_lines = [ - line - for line in err.strip().splitlines() - if expected_runtime_warning not in line - ] - self.assertEqual(filtered_err_lines, []) - else: - self.assertEqual(err, "") + self.assertEqual(self._nogil_filtered_err(err, "embedded_ext"), "") self.assertEqual(out, "\n" "my_test_extension.executed='yes'\n" @@ -264,6 +250,26 @@ def test_create_module_from_initfunc(self): "embedded_ext.executed='yes'\n" ) + def test_inittab_submodule_multiphase(self): + out, err = self.run_embedded_interpreter("test_inittab_submodule_multiphase") + self.assertEqual(err, "") + self.assertEqual(out, + "\n" + "\n" + "Hello from sub-module\n" + "mp_pkg.mp_submod.mp_submod_exec_slot_ran='yes'\n" + "mp_pkg.mp_pkg_exec_slot_ran='yes'\n" + ) + + def test_inittab_submodule_singlephase(self): + out, err = self.run_embedded_interpreter("test_inittab_submodule_singlephase") + self.assertEqual(self._nogil_filtered_err(err, "sp_pkg"), "") + self.assertEqual(out, + "\n" + "\n" + "Hello from sub-module\n" + ) + def test_forced_io_encoding(self): # Checks forced configuration of embedded interpreter IO streams env = dict(os.environ, PYTHONIOENCODING="utf-8:surrogateescape") @@ -541,6 +547,24 @@ def test_getargs_reset_static_parser(self): out, err = self.run_embedded_interpreter("test_repeated_init_exec", code) self.assertEqual(out, '1\n2\n3\n' * INIT_LOOPS) + @staticmethod + def _nogil_filtered_err(err: str, mod_name: str) -> str: + if not support.Py_GIL_DISABLED: + return err + + # the test imports a singlephase init extension, so it emits a warning + # under the free-threaded build + expected_runtime_warning = ( + "RuntimeWarning: The global interpreter lock (GIL)" + f" has been enabled to load module '{mod_name}'" + ) + filtered_err_lines = [ + line + for line in err.strip().splitlines() + if expected_runtime_warning not in line + ] + return "\n".join(filtered_err_lines) + def config_dev_mode(preconfig, config): preconfig['allocator'] = PYMEM_ALLOCATOR_DEBUG diff --git a/Lib/test/test_free_threading/test_capi.py b/Lib/test/test_free_threading/test_capi.py new file mode 100644 index 00000000000000..146d7cfc97adb7 --- /dev/null +++ b/Lib/test/test_free_threading/test_capi.py @@ -0,0 +1,47 @@ +import ctypes +import sys +import unittest + +from test.support import threading_helper +from test.support.threading_helper import run_concurrently + + +_PyImport_AddModuleRef = ctypes.pythonapi.PyImport_AddModuleRef +_PyImport_AddModuleRef.argtypes = (ctypes.c_char_p,) +_PyImport_AddModuleRef.restype = ctypes.py_object + + +@threading_helper.requires_working_threading() +class TestImportCAPI(unittest.TestCase): + def test_pyimport_addmoduleref_thread_safe(self): + # gh-137422: Concurrent calls to PyImport_AddModuleRef with the same + # module name must return the same module object. + + NUM_ITERS = 10 + NTHREADS = 4 + + module_name = f"test_free_threading_addmoduleref_{id(self)}" + module_name_bytes = module_name.encode() + sys.modules.pop(module_name, None) + results = [] + + def worker(): + module = _PyImport_AddModuleRef(module_name_bytes) + results.append(module) + + for _ in range(NUM_ITERS): + try: + run_concurrently(worker_func=worker, nthreads=NTHREADS) + self.assertEqual(len(results), NTHREADS) + reference = results[0] + for module in results[1:]: + self.assertIs(module, reference) + self.assertIn(module_name, sys.modules) + self.assertIs(sys.modules[module_name], reference) + finally: + results.clear() + sys.modules.pop(module_name, None) + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_free_threading/test_csv.py b/Lib/test/test_free_threading/test_csv.py new file mode 100644 index 00000000000000..beb4510a1281b8 --- /dev/null +++ b/Lib/test/test_free_threading/test_csv.py @@ -0,0 +1,50 @@ +import csv +import io +import unittest + +from test.support import threading_helper +from test.support.threading_helper import run_concurrently + + +NTHREADS = 10 + + +@threading_helper.requires_working_threading() +class TestCSV(unittest.TestCase): + def test_concurrent_reader_next(self): + input_rows = [f"{i},{i},{i}" for i in range(50)] + input_stream = io.StringIO("\n".join(input_rows)) + reader = csv.reader(input_stream) + output_rows = [] + + def read_row(): + for row in reader: + self.assertEqual(len(row), 3) + output_rows.append(",".join(row)) + + run_concurrently(worker_func=read_row, nthreads=NTHREADS) + self.assertSetEqual(set(input_rows), set(output_rows)) + + def test_concurrent_writer_writerow(self): + output_stream = io.StringIO() + writer = csv.writer(output_stream) + row_per_thread = 10 + expected_rows = [] + + def write_row(): + for i in range(row_per_thread): + writer.writerow([i, i, i]) + expected_rows.append(f"{i},{i},{i}") + + run_concurrently(worker_func=write_row, nthreads=NTHREADS) + + # Rewind to the start of the stream and parse the rows + output_stream.seek(0) + output_rows = [line.strip() for line in output_stream.readlines()] + + self.assertEqual(len(output_rows), NTHREADS * row_per_thread) + self.assertListEqual(sorted(output_rows), sorted(expected_rows)) + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_free_threading/test_monitoring.py b/Lib/test/test_free_threading/test_monitoring.py index 407bf7cbdee917..2cd6e7b035ecb4 100644 --- a/Lib/test/test_free_threading/test_monitoring.py +++ b/Lib/test/test_free_threading/test_monitoring.py @@ -35,10 +35,10 @@ def work(self, n, funcs): return n return self.work(n - 1, funcs) + self.work(n - 2, funcs) - def start_work(self, n, funcs): + def start_work(self, n, funcs, barrier): # With the GIL builds we need to make sure that the hooks have # a chance to run as it's possible to run w/o releasing the GIL. - time.sleep(0.1) + barrier.wait() self.work(n, funcs) def after_test(self): @@ -53,14 +53,16 @@ def test_instrumentation(self): exec("def f(): pass", x) funcs.append(x["f"]) + barrier = Barrier(self.thread_count + 1) threads = [] for i in range(self.thread_count): # Each thread gets a copy of the func list to avoid contention - t = Thread(target=self.start_work, args=(self.fib, list(funcs))) + t = Thread(target=self.start_work, args=(self.fib, list(funcs), barrier)) t.start() threads.append(t) self.after_threads() + barrier.wait() while True: any_alive = False @@ -73,6 +75,9 @@ def test_instrumentation(self): break self.during_threads() + # Sleep to avoid setting monitoring events too rapidly and + # overflowing the global version counter + time.sleep(0.0001) self.after_test() @@ -117,7 +122,6 @@ class MonitoringMultiThreaded( def setUp(self): super().setUp() self.set = False - self.called = False monitoring.register_callback( self.tool_id, monitoring.events.LINE, self.callback ) @@ -127,10 +131,7 @@ def tearDown(self): super().tearDown() def callback(self, *args): - self.called = True - - def after_test(self): - self.assertTrue(self.called) + pass def during_threads(self): if self.set: @@ -148,16 +149,11 @@ class SetTraceMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): def setUp(self): self.set = False - self.called = False - - def after_test(self): - self.assertTrue(self.called) def tearDown(self): sys.settrace(None) def trace_func(self, frame, event, arg): - self.called = True return self.trace_func def during_threads(self): @@ -174,16 +170,11 @@ class SetProfileMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): def setUp(self): self.set = False - self.called = False - - def after_test(self): - self.assertTrue(self.called) def tearDown(self): sys.setprofile(None) def trace_func(self, frame, event, arg): - self.called = True return self.trace_func def during_threads(self): @@ -200,16 +191,11 @@ class SetProfileAllThreadsMultiThreaded(InstrumentationMultiThreadedMixin, TestC def setUp(self): self.set = False - self.called = False - - def after_test(self): - self.assertTrue(self.called) def tearDown(self): threading.setprofile_all_threads(None) def trace_func(self, frame, event, arg): - self.called = True return self.trace_func def during_threads(self): diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 10c3a622107714..ec5df4d20e7085 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -846,11 +846,15 @@ def test_get_stats(self): self.assertEqual(len(stats), 3) for st in stats: self.assertIsInstance(st, dict) - self.assertEqual(set(st), - {"collected", "collections", "uncollectable"}) + self.assertEqual( + set(st), + {"collected", "collections", "uncollectable", "candidates", "duration"} + ) self.assertGreaterEqual(st["collected"], 0) self.assertGreaterEqual(st["collections"], 0) self.assertGreaterEqual(st["uncollectable"], 0) + self.assertGreaterEqual(st["candidates"], 0) + self.assertGreaterEqual(st["duration"], 0) # Check that collection counts are incremented correctly if gc.isenabled(): self.addCleanup(gc.enable) @@ -861,11 +865,25 @@ def test_get_stats(self): self.assertEqual(new[0]["collections"], old[0]["collections"] + 1) self.assertEqual(new[1]["collections"], old[1]["collections"]) self.assertEqual(new[2]["collections"], old[2]["collections"]) + self.assertGreater(new[0]["duration"], old[0]["duration"]) + self.assertEqual(new[1]["duration"], old[1]["duration"]) + self.assertEqual(new[2]["duration"], old[2]["duration"]) + for stat in ["collected", "uncollectable", "candidates"]: + self.assertGreaterEqual(new[0][stat], old[0][stat]) + self.assertEqual(new[1][stat], old[1][stat]) + self.assertEqual(new[2][stat], old[2][stat]) gc.collect(2) - new = gc.get_stats() - self.assertEqual(new[0]["collections"], old[0]["collections"] + 1) + old, new = new, gc.get_stats() + self.assertEqual(new[0]["collections"], old[0]["collections"]) self.assertEqual(new[1]["collections"], old[1]["collections"]) self.assertEqual(new[2]["collections"], old[2]["collections"] + 1) + self.assertEqual(new[0]["duration"], old[0]["duration"]) + self.assertEqual(new[1]["duration"], old[1]["duration"]) + self.assertGreater(new[2]["duration"], old[2]["duration"]) + for stat in ["collected", "uncollectable", "candidates"]: + self.assertEqual(new[0][stat], old[0][stat]) + self.assertEqual(new[1][stat], old[1][stat]) + self.assertGreaterEqual(new[2][stat], old[2][stat]) def test_freeze(self): gc.freeze() @@ -1298,9 +1316,11 @@ def test_collect(self): # Check that we got the right info dict for all callbacks for v in self.visit: info = v[2] - self.assertTrue("generation" in info) - self.assertTrue("collected" in info) - self.assertTrue("uncollectable" in info) + self.assertIn("generation", info) + self.assertIn("collected", info) + self.assertIn("uncollectable", info) + self.assertIn("candidates", info) + self.assertIn("duration", info) def test_collect_generation(self): self.preclean() diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 33845d8a9e2651..489bb049d2fadb 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -40,12 +40,15 @@ openssl_hashlib = import_fresh_module('hashlib', fresh=['_hashlib']) try: - from _hashlib import HASH, HASHXOF, openssl_md_meth_names, get_fips_mode + import _hashlib except ImportError: - HASH = None - HASHXOF = None - openssl_md_meth_names = frozenset() - + _hashlib = None +# The extension module may exist but only define some of these. gh-141907 +HASH = getattr(_hashlib, 'HASH', None) +HASHXOF = getattr(_hashlib, 'HASHXOF', None) +openssl_md_meth_names = getattr(_hashlib, 'openssl_md_meth_names', frozenset()) +get_fips_mode = getattr(_hashlib, 'get_fips_mode', None) +if not get_fips_mode: def get_fips_mode(): return 0 @@ -631,9 +634,14 @@ def check_sha3(self, name, capacity, rate, suffix): constructors = self.constructors_to_test[name] for hash_object_constructor in constructors: m = hash_object_constructor() - if HASH is not None and isinstance(m, HASH): - # _hashopenssl's variant does not have extra SHA3 attributes - continue + if name.startswith('shake_'): + if HASHXOF is not None and isinstance(m, HASHXOF): + # _hashopenssl's variant does not have extra SHA3 attributes + continue + else: + if HASH is not None and isinstance(m, HASH): + # _hashopenssl's variant does not have extra SHA3 attributes + continue self.assertEqual(capacity + rate, 1600) self.assertEqual(m._capacity_bits, capacity) self.assertEqual(m._rate_bits, rate) @@ -1156,7 +1164,8 @@ def test_disallow_instantiation(self): def test_hash_disallow_instantiation(self): # internal types like _hashlib.HASH are not constructable support.check_disallow_instantiation(self, HASH) - support.check_disallow_instantiation(self, HASHXOF) + if HASHXOF is not None: + support.check_disallow_instantiation(self, HASHXOF) def test_readonly_types(self): for algorithm, constructors in self.constructors_to_test.items(): diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 19dde9362a43b6..e4eff1ea17a670 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -109,12 +109,13 @@ def get_events(self): class TestCaseBase(unittest.TestCase): - def get_collector(self): - return EventCollector(convert_charrefs=False) + def get_collector(self, convert_charrefs=False): + return EventCollector(convert_charrefs=convert_charrefs) - def _run_check(self, source, expected_events, collector=None): + def _run_check(self, source, expected_events, + *, collector=None, convert_charrefs=False): if collector is None: - collector = self.get_collector() + collector = self.get_collector(convert_charrefs=convert_charrefs) parser = collector for s in source: parser.feed(s) @@ -128,7 +129,7 @@ def _run_check(self, source, expected_events, collector=None): def _run_check_extra(self, source, events): self._run_check(source, events, - EventCollectorExtra(convert_charrefs=False)) + collector=EventCollectorExtra(convert_charrefs=False)) class HTMLParserTestCase(TestCaseBase): @@ -187,10 +188,87 @@ def test_malformatted_charref(self): ]) def test_unclosed_entityref(self): - self._run_check("&entityref foo", [ - ("entityref", "entityref"), - ("data", " foo"), - ]) + self._run_check('> <', [('entityref', 'gt'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('> <', [('data', '> <')], convert_charrefs=True) + + self._run_check('&undefined <', + [('entityref', 'undefined'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('&undefined <', [('data', '&undefined <')], + convert_charrefs=True) + + self._run_check('>undefined <', + [('entityref', 'gtundefined'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('>undefined <', [('data', '>undefined <')], + convert_charrefs=True) + + self._run_check('& <', [('data', '& '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('& <', [('data', '& <')], convert_charrefs=True) + + def test_eof_in_entityref(self): + self._run_check('>', [('entityref', 'gt')], convert_charrefs=False) + self._run_check('>', [('data', '>')], convert_charrefs=True) + + self._run_check('&g', [('entityref', 'g')], convert_charrefs=False) + self._run_check('&g', [('data', '&g')], convert_charrefs=True) + + self._run_check('&undefined', [('entityref', 'undefined')], + convert_charrefs=False) + self._run_check('&undefined', [('data', '&undefined')], + convert_charrefs=True) + + self._run_check('>undefined', [('entityref', 'gtundefined')], + convert_charrefs=False) + self._run_check('>undefined', [('data', '>undefined')], + convert_charrefs=True) + + self._run_check('&', [('data', '&')], convert_charrefs=False) + self._run_check('&', [('data', '&')], convert_charrefs=True) + + def test_unclosed_charref(self): + self._run_check('{ <', [('charref', '123'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('{ <', [('data', '{ <')], convert_charrefs=True) + self._run_check('« <', [('charref', 'xab'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('« <', [('data', '\xab <')], convert_charrefs=True) + + self._run_check('� <', + [('charref', '123456789'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('� <', [('data', '\ufffd <')], + convert_charrefs=True) + self._run_check('� <', + [('charref', 'x123456789'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('� <', [('data', '\ufffd <')], + convert_charrefs=True) + + self._run_check('&# <', [('data', '&# '), ('entityref', 'lt')], convert_charrefs=False) + self._run_check('&# <', [('data', '&# <')], convert_charrefs=True) + self._run_check('&#x <', [('data', '&#x '), ('entityref', 'lt')], convert_charrefs=False) + self._run_check('&#x <', [('data', '&#x <')], convert_charrefs=True) + + def test_eof_in_charref(self): + self._run_check('{', [('charref', '123')], convert_charrefs=False) + self._run_check('{', [('data', '{')], convert_charrefs=True) + self._run_check('«', [('charref', 'xab')], convert_charrefs=False) + self._run_check('«', [('data', '\xab')], convert_charrefs=True) + + self._run_check('�', [('charref', '123456789')], + convert_charrefs=False) + self._run_check('�', [('data', '\ufffd')], convert_charrefs=True) + self._run_check('�', [('charref', 'x123456789')], + convert_charrefs=False) + self._run_check('�', [('data', '\ufffd')], convert_charrefs=True) + + self._run_check('&#', [('data', '&#')], convert_charrefs=False) + self._run_check('&#', [('data', '&#')], convert_charrefs=True) + self._run_check('&#x', [('data', '&#x')], convert_charrefs=False) + self._run_check('&#x', [('data', '&#x')], convert_charrefs=True) def test_bad_nesting(self): # Strangely, this *is* supposed to test that overlapping @@ -762,20 +840,6 @@ def test_correct_detection_of_start_tags(self): ] self._run_check(html, expected) - def test_EOF_in_charref(self): - # see #17802 - # This test checks that the UnboundLocalError reported in the issue - # is not raised, however I'm not sure the returned values are correct. - # Maybe HTMLParser should use self.unescape for these - data = [ - ('a&', [('data', 'a&')]), - ('a&b', [('data', 'ab')]), - ('a&b ', [('data', 'a'), ('entityref', 'b'), ('data', ' ')]), - ('a&b;', [('data', 'a'), ('entityref', 'b')]), - ] - for html, expected in data: - self._run_check(html, expected) - def test_eof_in_comments(self): data = [ ('