Skip to content

Commit

Permalink
Cache git dependencies as wheels
Browse files Browse the repository at this point in the history
Currently, poetry install will clone, build and install every git
dependency when it's not present in the environment. This is OK for
developer's machines, but not OK for CI - there environment is always
fresh, and installing git dependencies takes significant time on each CI
run, especially if the dependency has C extensions that need to be
built.

This commit builds a wheel for every git dependency that has precise
reference hash in lock file and is not required to be in editable mode,
stores that wheel in a cache dir and will install from it instead of
cloning the repository again.
  • Loading branch information
maksbotan committed Oct 26, 2022
1 parent 4a07b5e commit 7f089ac
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 10 deletions.
16 changes: 14 additions & 2 deletions src/poetry/installation/chef.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,15 @@ def get_cached_archives_for_link(self, link: Link) -> list[Path]:

return paths

def get_cache_directory_for_git(self, url: str, revision: str) -> Path:
key_parts = {"url": url, "revision": revision}

return self._get_directory_from_hash(
json.dumps(
key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True
).encode("ascii")
)

def get_cache_directory_for_link(self, link: Link) -> Path:
key_parts = {"url": link.url_without_fragment}

Expand All @@ -77,11 +86,14 @@ def get_cache_directory_for_link(self, link: Link) -> Path:
self._env.marker_env["interpreter_version"].split(".")[:2]
)

key = hashlib.sha256(
return self._get_directory_from_hash(
json.dumps(
key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True
).encode("ascii")
).hexdigest()
)

def _get_directory_from_hash(self, key_string: bytes) -> Path:
key = hashlib.sha256(key_string).hexdigest()

split_key = [key[:2], key[2:4], key[4:6], key[6:]]

Expand Down
48 changes: 40 additions & 8 deletions src/poetry/installation/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from poetry.core.packages.utils.link import Link
from poetry.core.pyproject.toml import PyProjectTOML

from poetry.exceptions import PoetryException
from poetry.installation.chef import Chef
from poetry.installation.chooser import Chooser
from poetry.installation.operations import Install
Expand All @@ -30,6 +31,7 @@
from poetry.utils.helpers import atomic_open
from poetry.utils.helpers import pluralize
from poetry.utils.helpers import remove_directory
from poetry.utils.pip import pip_build_wheel
from poetry.utils.pip import pip_install


Expand Down Expand Up @@ -599,25 +601,55 @@ def _install_git(self, operation: Install | Update) -> int:
package = operation.package
operation_message = self.get_operation_message(operation)

assert package.source_url is not None

reference = package.source_resolved_reference or package.source_reference
# Do not cache git repos when we don't know exact commit reference, otherwise
# cache may become stale.
# Always fallback to directory installation for editable dependencies.
if reference is not None and not package.develop:
cache_directory = self._chef.get_cache_directory_for_git(
package.source_url, reference
)
wheel = next(cache_directory.glob("*.whl"), None)
else:
cache_directory = None
wheel = None

if wheel is not None:
message = (
f" <fg=blue;options=bold>•</> {operation_message}:"
" <info>Installing...</info>"
)
self._write(operation, message)
return self.pip_install(wheel, upgrade=True)

message = (
f" <fg=blue;options=bold>•</> {operation_message}: <info>Cloning...</info>"
)
self._write(operation, message)

assert package.source_url is not None
source = Git.clone(
url=package.source_url,
source_root=self._env.path / "src",
revision=package.source_resolved_reference or package.source_reference,
revision=reference,
)

# Now we just need to install from the source directory
original_url = package.source_url
package._source_url = str(source.path)

status_code = self._install_directory(operation)
if cache_directory is not None:
pip_build_wheel(Path(source.path), cache_directory, self._env)
wheel = next(cache_directory.glob("*.whl"), None)
if wheel is None:
raise PoetryException(
f"Wheel file for {package.name} not found after building"
)

package._source_url = original_url
status_code = self.pip_install(wheel, upgrade=True)
else:
# Now we just need to install from the source directory
original_url = package.source_url
package._source_url = str(source.path)
status_code = self._install_directory(operation)
package._source_url = original_url

return status_code

Expand Down
30 changes: 30 additions & 0 deletions src/poetry/utils/pip.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,33 @@ def pip_install(
return environment.run_pip(*args)
except EnvCommandError as e:
raise PoetryException(f"Failed to install {path.as_posix()}") from e


def pip_build_wheel(
path: Path,
wheel_directory: Path,
environment: Env,
deps: bool = False,
upgrade: bool = False,
) -> int | str:
args = [
"wheel",
"--disable-pip-version-check",
"--isolated",
"--no-input",
"--wheel-dir",
str(wheel_directory),
]

if upgrade:
args.append("--upgrade")

if not deps:
args.append("--no-deps")

args.append(str(path))

try:
return environment.run_pip(*args)
except EnvCommandError as e:
raise PoetryException(f"Failed to build wheel for {path.as_posix()}") from e
2 changes: 2 additions & 0 deletions tests/installation/test_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,8 @@ def test_executor_should_write_pep610_url_references_for_git_with_subdirectories
source_resolved_reference="123456",
source_url="https://github.com/demo/subdirectories.git",
source_subdirectory="two",
# Bypass wheel building logic - does not work in test environment
develop=True,
)

executor = Executor(tmp_venv, pool, config, io)
Expand Down

0 comments on commit 7f089ac

Please sign in to comment.