Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add check for UUIDs when checking for existence of archives #425

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions payu/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,8 @@ def set_experiment_name(self,
self.experiment_name)
return

# Legacy experiment name and archive path
# Legacy experiment name
legacy_name = self.control_path.name
legacy_archive_path = self.lab_archive_path / legacy_name

if not self.enabled:
# Metadata/UUID generation is disabled, so leave UUID out of
Expand All @@ -167,18 +166,13 @@ def set_experiment_name(self,
f"Experiment name used for archival: {self.experiment_name}")
return

# Branch-UUID experiment name and archive path
branch_uuid_experiment_name = self.new_experiment_name()
archive_path = self.lab_archive_path / branch_uuid_experiment_name

if is_new_experiment or archive_path.exists():
if is_new_experiment or self.has_archive(branch_uuid_experiment_name):
# Use branch-UUID aware experiment name
self.experiment_name = branch_uuid_experiment_name
elif legacy_archive_path.exists():
elif self.has_archive(legacy_name):
# Use legacy CONTROL-DIR experiment name
self.experiment_name = legacy_name
print(f"Pre-existing archive found at: {legacy_archive_path}. "
f"Experiment name will remain: {legacy_name}")
elif keep_uuid:
# Use same experiment UUID and use branch-UUID name for archive
self.experiment_name = branch_uuid_experiment_name
Expand All @@ -190,6 +184,25 @@ def set_experiment_name(self,
)
self.set_new_uuid(is_new_experiment=True)

def has_archive(self, experiment_name: str) -> bool:
"""Return True if archive under the experiment name exists and
if it exists, check for a non-matching UUID in archive metadata."""
archive_path = self.lab_archive_path / experiment_name

if archive_path.exists():
# Check if the UUID in the archive metadata matches the
# UUID in metadata
archive_metadata_path = archive_path / METADATA_FILENAME
if archive_metadata_path.exists():
archive_metadata = YAML().load(archive_metadata_path)
if (UUID_FIELD in archive_metadata and
archive_metadata[UUID_FIELD] != self.uuid):
print("Mismatch of UUIDs between metadata and an archive "
f"metadata found at: {archive_metadata_path}")
return False
print(f"Found experiment archive: {archive_path}")
return archive_path.exists()

def set_new_uuid(self, is_new_experiment: bool = False) -> None:
"""Generate a new uuid and set experiment name"""
self.uuid_updated = True
Expand Down
47 changes: 47 additions & 0 deletions test/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,52 @@ def test_set_experiment_and_uuid(uuid_exists, keep_uuid, is_new_experiment,
assert metadata.uuid == expected_uuid


@pytest.mark.parametrize(
"archive_metadata_exists, archive_uuid, expected_result",
[
# A legacy archive exists, but there's no corresponding metadata
# in archive
(
False, None, True
),
# Archive metadata exists but has no UUID
(
True, None, True
),
# Archive metadata exists with same UUID
(
True, "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", True
),
# Archive metadata exists with different UUID
(
True, "cb793e91-6168-4ed2-a70c-f6f9ccf1659b", False
),
]
)
def test_has_archive(archive_metadata_exists, archive_uuid, expected_result):
# Setup config and metadata
write_config(config)
with cd(ctrldir):
metadata = Metadata(archive_dir)
metadata.uuid = "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136"

# Setup archive and it's metadata file
archive_path = archive_dir / "ctrl"
archive_path.mkdir(parents=True)

if archive_metadata_exists:
archive_metadata = {}

if archive_uuid is not None:
archive_metadata["experiment_uuid"] = archive_uuid

with open(archive_path / 'metadata.yaml', 'w') as file:
YAML().dump(archive_metadata, file)

result = metadata.has_archive("ctrl")
assert result == expected_result


def test_set_configured_experiment_name():
# Set experiment in config file
test_config = copy.deepcopy(config)
Expand All @@ -287,6 +333,7 @@ def test_set_configured_experiment_name():
)
def test_new_experiment_name(branch, expected_name):
# Test configured experiment name is the set experiment name
write_config(config)
with cd(ctrldir):
metadata = Metadata(archive_dir)

Expand Down
Loading