From ebd5c5b48c3e56e08630d11f7ed4cc5b23b02ed9 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Wed, 11 Nov 2020 11:45:28 -0500 Subject: [PATCH] feat: way to only recompile changed files (#2643) * feat: lazy compile * refactor: lazy -> only_changed * refactor: leave the changed function up to the user * refactor: pass a function, based on @YannickJadoul and @HDembinski's suggestions * refactor: old -> _old, as it's not intended for users * docs: slight improvmenent from @rwgk * docs: Ccache spelling, extra warning about pip caching Ccache spelling noted by @YannickJadoul --- .pre-commit-config.yaml | 2 +- docs/compiling.rst | 30 ++++++++++++++++++ pybind11/setup_helpers.py | 62 ++++++++++++++++++++++++++++++-------- pybind11/setup_helpers.pyi | 17 +++++++++-- 4 files changed, 95 insertions(+), 16 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cf519b475..85254a8a9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -87,7 +87,7 @@ repos: - id: disallow-caps name: Disallow improper capitalization language: pygrep - entry: PyBind|Numpy|Cmake + entry: PyBind|Numpy|Cmake|CCache exclude: .pre-commit-config.yaml - repo: local diff --git a/docs/compiling.rst b/docs/compiling.rst index 25b703e34..f26e6cf60 100644 --- a/docs/compiling.rst +++ b/docs/compiling.rst @@ -89,6 +89,36 @@ default number of threads (0 will take the number of threads available) and ``max=N``, the maximum number of threads; if you have a large extension you may want set this to a memory dependent number. +If you are developing rapidly and have a lot of C++ files, you may want to +avoid rebuilding files that have not changed. For simple cases were you are +using ``pip install -e .`` and do not have local headers, you can skip the +rebuild if a object file is newer than it's source (headers are not checked!) +with the following: + +.. code-block:: python + + from pybind11.setup_helpers import ParallelCompile, naive_recompile + + SmartCompile("NPY_NUM_BUILD_JOBS", needs_recompile=naive_recompile).install() + + +If you have a more complex build, you can implement a smarter function and pass +it to ``needs_recompile``, or you can use [Ccache]_ instead. ``CXX="cache g++" +pip install -e .`` would be the way to use it with GCC, for example. Unlike the +simple solution, this even works even when not compiling in editable mode, but +it does require Ccache to be installed. + +Keep in mind that Pip will not even attempt to rebuild if it thinks it has +already built a copy of your code, which it deduces from the version number. +One way to avoid this is to use [setuptools_scm]_, which will generate a +version number that includes the number of commits since your last tag and a +hash for a dirty directory. Another way to force a rebuild is purge your cache +or use Pip's ``--no-cache-dir`` option. + +.. [Ccache] https://ccache.dev + +.. [setuptools_scm] https://github.com/pypa/setuptools_scm + .. _setup_helpers-pep518: PEP 518 requirements (Pip 10+ required) diff --git a/pybind11/setup_helpers.py b/pybind11/setup_helpers.py index 2dcbab4e0..33605ddfd 100644 --- a/pybind11/setup_helpers.py +++ b/pybind11/setup_helpers.py @@ -275,7 +275,8 @@ def auto_cpp_level(compiler): class build_ext(_build_ext): # noqa: N801 """ Customized build_ext that allows an auto-search for the highest supported - C++ level for Pybind11Extension. + C++ level for Pybind11Extension. This is only needed for the auto-search + for now, and is completely optional otherwise. """ def build_extensions(self): @@ -293,6 +294,23 @@ class build_ext(_build_ext): # noqa: N801 _build_ext.build_extensions(self) +def naive_recompile(obj, src): + """ + This will recompile only if the source file changes. It does not check + header files, so a more advanced function or Ccache is better if you have + editable header files in your package. + """ + return os.stat(obj).st_mtime < os.stat(src).st_mtime + + +def no_recompile(obg, src): + """ + This is the safest but slowest choice (and is the default) - will always + recompile sources. + """ + return True + + # Optional parallel compile utility # inspired by: http://stackoverflow.com/questions/11013851/speeding-up-build-process-with-distutils # and: https://github.com/tbenthompson/cppimport/blob/stable/cppimport/build_module.py @@ -306,24 +324,42 @@ class ParallelCompile(object): This takes several arguments that allow you to customize the compile function created: - envvar: Set an environment variable to control the compilation threads, like NPY_NUM_BUILD_JOBS - default: 0 will automatically multithread, or 1 will only multithread if the envvar is set. - max: The limit for automatic multithreading if non-zero + envvar: + Set an environment variable to control the compilation threads, like + NPY_NUM_BUILD_JOBS + default: + 0 will automatically multithread, or 1 will only multithread if the + envvar is set. + max: + The limit for automatic multithreading if non-zero + needs_recompile: + A function of (obj, src) that returns True when recompile is needed. No + effect in isolated mode; use ccache instead, see + https://github.com/matplotlib/matplotlib/issues/1507/ + + To use:: - To use: ParallelCompile("NPY_NUM_BUILD_JOBS").install() - or: + + or:: + with ParallelCompile("NPY_NUM_BUILD_JOBS"): setup(...) + + By default, this assumes all files need to be recompiled. A smarter + function can be provided via needs_recompile. If the output has not yet + been generated, the compile will always run, and this function is not + called. """ - __slots__ = ("envvar", "default", "max", "old") + __slots__ = ("envvar", "default", "max", "_old", "needs_recompile") - def __init__(self, envvar=None, default=0, max=0): + def __init__(self, envvar=None, default=0, max=0, needs_recompile=no_recompile): self.envvar = envvar self.default = default self.max = max - self.old = [] + self.needs_recompile = needs_recompile + self._old = [] def function(self): """ @@ -360,7 +396,9 @@ class ParallelCompile(object): src, ext = build[obj] except KeyError: return - compiler._compile(obj, src, ext, cc_args, extra_postargs, pp_opts) + + if not os.path.exists(obj) or self.needs_recompile(obj, src): + compiler._compile(obj, src, ext, cc_args, extra_postargs, pp_opts) try: import multiprocessing @@ -391,8 +429,8 @@ class ParallelCompile(object): return self def __enter__(self): - self.old.append(distutils.ccompiler.CCompiler.compile) + self._old.append(distutils.ccompiler.CCompiler.compile) return self.install() def __exit__(self, *args): - distutils.ccompiler.CCompiler.compile = self.old.pop() + distutils.ccompiler.CCompiler.compile = self._old.pop() diff --git a/pybind11/setup_helpers.pyi b/pybind11/setup_helpers.pyi index 19feb8b35..23232e1fd 100644 --- a/pybind11/setup_helpers.pyi +++ b/pybind11/setup_helpers.pyi @@ -1,7 +1,7 @@ # IMPORTANT: Should stay in sync with setup_helpers.py (mostly checked by CI / # pre-commit). -from typing import Any, Iterator, Optional, Type, TypeVar, Union +from typing import Any, Callable, Iterator, Optional, Type, TypeVar, Union from types import TracebackType from distutils.command.build_ext import build_ext as _build_ext # type: ignore @@ -33,12 +33,23 @@ def auto_cpp_level(compiler: distutils.ccompiler.CCompiler) -> Union[int, str]: class build_ext(_build_ext): # type: ignore def build_extensions(self) -> None: ... +def no_recompile(obj: str, src: str) -> bool: ... +def naive_recompile(obj: str, src: str) -> bool: ... + T = TypeVar("T", bound="ParallelCompile") class ParallelCompile: + envvar: Optional[str] + default: int + max: int + needs_recompile: Callable[[str, str], bool] def __init__( - self, envvar: Optional[str] = None, default: int = 0, max: int = 0 - ): ... + self, + envvar: Optional[str] = None, + default: int = 0, + max: int = 0, + needs_recompile: Callable[[str, str], bool] = no_recompile, + ) -> None: ... def function(self) -> Any: ... def install(self: T) -> T: ... def __enter__(self: T) -> T: ...