Skip to content

Commit

Permalink
Federated s3 fixture (GSI-965) (#126)
Browse files Browse the repository at this point in the history
* Add multi-node S3 storage fixtures

* Bump version to 3.6.0

* Compare sets with sets
  • Loading branch information
TheByronHimes authored Sep 3, 2024
1 parent 8b4817a commit 2c1cca7
Show file tree
Hide file tree
Showing 5 changed files with 252 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .pyproject_generation/pyproject_custom.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "hexkit"
version = "3.5.1"
version = "3.6.0"
description = "A Toolkit for Building Microservices using the Hexagonal Architecture"
requires-python = ">=3.9"
classifiers = [
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ classifiers = [
"Intended Audience :: Developers",
]
name = "hexkit"
version = "3.5.1"
version = "3.6.0"
description = "A Toolkit for Building Microservices using the Hexagonal Architecture"
dependencies = [
"pydantic >=2, <3",
Expand Down
18 changes: 18 additions & 0 deletions src/hexkit/providers/s3/testutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,27 @@
MEBIBYTE,
TEST_FILE_DIR,
TEST_FILE_PATHS,
FederatedS3Fixture,
FileObject,
S3ContainerFixture,
S3Fixture,
S3MultiContainerFixture,
calc_md5,
clean_federated_s3_fixture,
clean_s3_fixture,
federated_s3_fixture,
get_clean_federated_s3_fixture,
get_clean_s3_fixture,
get_persistent_federated_s3_fixture,
get_persistent_s3_fixture,
get_s3_container_fixture,
get_s3_multi_container_fixture,
persistent_federated_s3_fixture,
persistent_s3_fixture,
populate_storage,
s3_container_fixture,
s3_fixture,
s3_multi_container_fixture,
temp_file_object,
tmp_file,
upload_file,
Expand Down Expand Up @@ -73,4 +82,13 @@
"upload_part",
"upload_part_of_size",
"upload_part_via_url",
"FederatedS3Fixture",
"S3MultiContainerFixture",
"clean_federated_s3_fixture",
"federated_s3_fixture",
"get_clean_federated_s3_fixture",
"get_s3_multi_container_fixture",
"get_persistent_federated_s3_fixture",
"s3_multi_container_fixture",
"persistent_federated_s3_fixture",
]
148 changes: 148 additions & 0 deletions src/hexkit/providers/s3/testutils/_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,3 +344,151 @@ def tmp_file() -> Generator[FileObject, None, None]:
"""A fixture that provides a temporary file."""
with temp_file_object() as temp_file:
yield temp_file


class FederatedS3Fixture:
"""Fixture containing multiple S3 fixtures to simulate federated storage."""

def __init__(self, storages: dict[str, S3Fixture]):
self.storages = storages

def get_configs_by_alias(self) -> dict[str, S3Config]:
"""Get the S3Config instance for each object storage in the fixture."""
return {alias: self.storages[alias].config for alias in self.storages}

async def populate_dummy_items(self, alias: str, contents: dict[str, list[str]]):
"""Convenience function to populate a specific S3Fixture.
Args:
- `alias`: The alias of the S3Fixture to populate.
- `contents`: A dictionary with bucket names as keys and lists of object names
as values. The buckets can be empty, and the objects are created with a size of
1 byte.
"""
if alias not in self.storages:
# This would indicate some kind of mismatch between config and fixture
raise RuntimeError(f"Alias '{alias}' not found in the federated S3 fixture")
storage = self.storages[alias]

# Populate the buckets so even empty buckets are established
await storage.populate_buckets([bucket for bucket in contents])

# Add the dummy items
for bucket, objects in contents.items():
for object in objects:
with temp_file_object(bucket, object, 1) as file:
await storage.populate_file_objects([file])


class S3MultiContainerFixture:
"""Fixture for managing multiple running S3 test containers in order to mimic
multiple object storages.
Without this fixture, separate S3Fixture instances would access the same
underlying storage resources.
"""

def __init__(self, s3_containers: dict[str, S3ContainerFixture]):
self.s3_containers = s3_containers

def __enter__(self):
"""Enter the context manager and start the S3 containers."""
for container in self.s3_containers.values():
container.__enter__()
return self

def __exit__(self, exc_type, exc_val, exc_tb):
"""Exit the context manager and clean up the S3 containers."""
for container in self.s3_containers.values():
container.__exit__(exc_type, exc_val, exc_tb)


def _s3_multi_container_fixture(
request,
) -> Generator[S3MultiContainerFixture, None, None]:
"""Fixture function for getting multiple running S3 test containers."""
try:
storage_aliases = request.getfixturevalue("storage_aliases")
except pytest.FixtureLookupError as err:
raise NotImplementedError(
"You must provide a 'storage_aliases' fixture in your test setup."
+ " It must have the same scope as 's3_multi_container'"
+ " and it must return a list of the storage aliases to be used."
) from err

if not storage_aliases:
raise RuntimeError("The 'storage_aliases' list must not be empty.")
s3_containers = {
alias: S3ContainerFixture(name=f"{alias}_s3_container")
for alias in storage_aliases
}
with S3MultiContainerFixture(s3_containers) as s3_multi_container:
yield s3_multi_container


def get_s3_multi_container_fixture(
scope: PytestScope = "session", name: str = "s3_multi_container"
):
"""Get a fixture containing multiple LocalStack test containers.
By default, the session scope is used for LocalStack test containers.
Requires that a 'storage_aliases' fixture is provided in the test setup.
"""
return pytest.fixture(_s3_multi_container_fixture, scope=scope, name=name)


s3_multi_container_fixture = get_s3_multi_container_fixture()


def _persistent_federated_s3_fixture(
s3_multi_container: S3MultiContainerFixture,
) -> Generator[FederatedS3Fixture, None, None]:
"""Fixture function that creates a persistent FederatedS3Fixture.
The state of each S3 storage in the fixture is not cleaned up.
"""
s3_fixtures = {}
for alias, container in s3_multi_container.s3_containers.items():
config = container.s3_config
storage = S3ObjectStorage(config=config)
s3_fixtures[alias] = S3Fixture(config=config, storage=storage)
yield FederatedS3Fixture(s3_fixtures)


def get_persistent_federated_s3_fixture(
scope: PytestScope = "function", name: str = "federated_s3"
):
"""Get a federated S3 storage fixture with desired scope.
The state of the S3 storage is not cleaned up by the fixture.
"""
return pytest.fixture(_persistent_federated_s3_fixture, scope=scope, name=name)


persistent_federated_s3_fixture = get_persistent_federated_s3_fixture()


async def _clean_federated_s3_fixture(
s3_multi_container: S3MultiContainerFixture,
) -> AsyncGenerator[FederatedS3Fixture, None]:
"""Fixture function that creates a clean FederatedS3Fixture instance.
The state of each S3 storage is cleaned up before yielding the fixture.
"""
for federated_s3_fixture in _persistent_federated_s3_fixture(s3_multi_container):
for s3_fixture in federated_s3_fixture.storages.values():
await s3_fixture.delete_buckets()
yield federated_s3_fixture


def get_clean_federated_s3_fixture(
scope: PytestScope = "function", name: str = "federated_s3"
):
"""Get a federated S3 storage fixture with desired scope.
The state of the S3 storage is not cleaned up by the fixture.
"""
return pytest_asyncio.fixture(_clean_federated_s3_fixture, scope=scope, name=name)


federated_s3_fixture = clean_federated_s3_fixture = get_clean_federated_s3_fixture()
84 changes: 84 additions & 0 deletions tests/unit/test_federated_s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for the FederatedS3Fixture and related items."""

import pytest

from hexkit.providers.s3 import S3Config
from hexkit.providers.s3.testutils import ( # noqa: F401
FederatedS3Fixture,
federated_s3_fixture,
s3_multi_container_fixture,
)
from hexkit.providers.s3.testutils._fixtures import S3MultiContainerFixture

pytestmark = pytest.mark.asyncio()

PRIMARY_STORAGE_ALIAS = "primary"
SECONDARY_STORAGE_ALIAS = "secondary"
STORAGE_ALIASES = [PRIMARY_STORAGE_ALIAS, SECONDARY_STORAGE_ALIAS]


@pytest.fixture(scope="session")
def storage_aliases():
"""Return the storage aliases for the federated S3 fixture."""
return STORAGE_ALIASES


async def test_get_configs_by_alias(federated_s3: FederatedS3Fixture):
"""Test that `get_configs_by_alias` returns the configs of each storage by name."""
configs = federated_s3.get_configs_by_alias()
assert set(configs) == set(STORAGE_ALIASES)
for config in configs.values():
assert isinstance(config, S3Config)


async def test_populate_dummy_items(federated_s3: FederatedS3Fixture):
"""Test the populate_dummy_items function on the FederatedS3Fixture."""
# Define some stuff to add
buckets = {
"bucket1": ["object1", "object2"],
"empty": [],
}

# Populate the items
await federated_s3.populate_dummy_items(PRIMARY_STORAGE_ALIAS, buckets)

# Check that the items were added to the primary storage
assert await federated_s3.storages[PRIMARY_STORAGE_ALIAS].storage.does_object_exist(
bucket_id="bucket1", object_id="object1"
)
assert await federated_s3.storages[PRIMARY_STORAGE_ALIAS].storage.does_bucket_exist(
bucket_id="empty"
)

# Check that the items were not added to/are not accessible via the secondary storage
assert not await federated_s3.storages[
SECONDARY_STORAGE_ALIAS
].storage.does_object_exist(bucket_id="bucket1", object_id="object1")
assert not await federated_s3.storages[
SECONDARY_STORAGE_ALIAS
].storage.does_bucket_exist(bucket_id="empty")


async def test_multi_container_fixture(
s3_multi_container: S3MultiContainerFixture,
):
"""Test that the multi container fixture actually uses separate S3 instances."""
assert set(s3_multi_container.s3_containers) == set(STORAGE_ALIASES)
storage1 = s3_multi_container.s3_containers[PRIMARY_STORAGE_ALIAS]
storage2 = s3_multi_container.s3_containers[SECONDARY_STORAGE_ALIAS]
assert storage1.s3_config != storage2.s3_config

0 comments on commit 2c1cca7

Please sign in to comment.