From d078bd59c9fd75e7eb7832f02dda4829c819e996 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Thu, 14 Mar 2024 13:27:23 +1100 Subject: [PATCH 1/2] Add check for UUIDs when checking for existence of archives --- payu/metadata.py | 28 ++++++++++++++++------ test/test_metadata.py | 55 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 7 deletions(-) diff --git a/payu/metadata.py b/payu/metadata.py index c264c9a5..2337825c 100644 --- a/payu/metadata.py +++ b/payu/metadata.py @@ -157,7 +157,6 @@ def set_experiment_name(self, # Legacy experiment name and archive path legacy_name = self.control_path.name - legacy_archive_path = self.lab_archive_path / legacy_name if not self.enabled: # Metadata/UUID generation is disabled, so leave UUID out of @@ -167,18 +166,14 @@ def set_experiment_name(self, f"Experiment name used for archival: {self.experiment_name}") return - # Branch-UUID experiment name and archive path branch_uuid_experiment_name = self.new_experiment_name() - archive_path = self.lab_archive_path / branch_uuid_experiment_name - if is_new_experiment or archive_path.exists(): + if is_new_experiment or self.has_archive(branch_uuid_experiment_name): # Use branch-UUID aware experiment name self.experiment_name = branch_uuid_experiment_name - elif legacy_archive_path.exists(): + elif self.has_archive(legacy_name): # Use legacy CONTROL-DIR experiment name self.experiment_name = legacy_name - print(f"Pre-existing archive found at: {legacy_archive_path}. " - f"Experiment name will remain: {legacy_name}") elif keep_uuid: # Use same experiment UUID and use branch-UUID name for archive self.experiment_name = branch_uuid_experiment_name @@ -190,6 +185,25 @@ def set_experiment_name(self, ) self.set_new_uuid(is_new_experiment=True) + def has_archive(self, experiment_name: str) -> bool: + """Return True if archive under the experiment name exists and + if it exists, check for a non-matching UUID in archive metadata.""" + archive_path = self.lab_archive_path / experiment_name + + if archive_path.exists(): + # If a new UUID has not been generated, check if the + # UUID in the archive metadata matches UUID in metadata + archive_metadata_path = archive_path / METADATA_FILENAME + if not self.uuid_updated and archive_metadata_path.exists(): + archive_metadata = YAML().load(archive_metadata_path) + if (UUID_FIELD not in archive_metadata + or archive_metadata[UUID_FIELD] != self.uuid): + print("Mismatch of UUIDs between metadata and an archive " + f"metadata found at: {archive_metadata_path}") + return False + print(f"Found experiment archive: {archive_path}") + return archive_path.exists() + def set_new_uuid(self, is_new_experiment: bool = False) -> None: """Generate a new uuid and set experiment name""" self.uuid_updated = True diff --git a/test/test_metadata.py b/test/test_metadata.py index 49aa56dd..c0e777a4 100644 --- a/test/test_metadata.py +++ b/test/test_metadata.py @@ -261,6 +261,60 @@ def test_set_experiment_and_uuid(uuid_exists, keep_uuid, is_new_experiment, assert metadata.uuid == expected_uuid +@pytest.mark.parametrize( + "new_uuid, archive_metadata_exists, archive_uuid, expected_result", + [ + # A legacy archive exists, but there's no corresponding metadata + ( + True, False, None, True + ), + # A legacy archive exists, there's a metadata with a UUID in + # control directory, but no metadata in archive + ( + False, False, None, True + ), + # Archive metadata exists but has no UUID + ( + False, True, None, False + ), + # Archive metadata exists with same UUID + ( + False, True, "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", True + ), + # Archive metadata exists with different UUID + ( + False, True, "cb793e91-6168-4ed2-a70c-f6f9ccf1659b", False + ), + ] +) +def test_has_archive(new_uuid, archive_metadata_exists, archive_uuid, + expected_result): + # Setup config and metadata + write_config(config) + with cd(ctrldir): + metadata = Metadata(archive_dir) + metadata.uuid = "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136" + + if new_uuid: + metadata.uuid_updated = True + + # Setup archive and it's metadata file + archive_path = archive_dir / "ctrl" + archive_path.mkdir(parents=True) + + if archive_metadata_exists: + archive_metadata = {} + + if archive_uuid is not None: + archive_metadata["experiment_uuid"] = archive_uuid + + with open(archive_path / 'metadata.yaml', 'w') as file: + YAML().dump(archive_metadata, file) + + result = metadata.has_archive("ctrl") + assert result == expected_result + + def test_set_configured_experiment_name(): # Set experiment in config file test_config = copy.deepcopy(config) @@ -287,6 +341,7 @@ def test_set_configured_experiment_name(): ) def test_new_experiment_name(branch, expected_name): # Test configured experiment name is the set experiment name + write_config(config) with cd(ctrldir): metadata = Metadata(archive_dir) From 00964dd12f6ab445eb9290f67d12a430821d464c Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Wed, 3 Apr 2024 11:37:44 +1100 Subject: [PATCH 2/2] Simplify check for existing archives --- payu/metadata.py | 13 ++++++------- test/test_metadata.py | 22 +++++++--------------- 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/payu/metadata.py b/payu/metadata.py index 2337825c..53cd66b6 100644 --- a/payu/metadata.py +++ b/payu/metadata.py @@ -155,7 +155,7 @@ def set_experiment_name(self, self.experiment_name) return - # Legacy experiment name and archive path + # Legacy experiment name legacy_name = self.control_path.name if not self.enabled: @@ -167,7 +167,6 @@ def set_experiment_name(self, return branch_uuid_experiment_name = self.new_experiment_name() - if is_new_experiment or self.has_archive(branch_uuid_experiment_name): # Use branch-UUID aware experiment name self.experiment_name = branch_uuid_experiment_name @@ -191,13 +190,13 @@ def has_archive(self, experiment_name: str) -> bool: archive_path = self.lab_archive_path / experiment_name if archive_path.exists(): - # If a new UUID has not been generated, check if the - # UUID in the archive metadata matches UUID in metadata + # Check if the UUID in the archive metadata matches the + # UUID in metadata archive_metadata_path = archive_path / METADATA_FILENAME - if not self.uuid_updated and archive_metadata_path.exists(): + if archive_metadata_path.exists(): archive_metadata = YAML().load(archive_metadata_path) - if (UUID_FIELD not in archive_metadata - or archive_metadata[UUID_FIELD] != self.uuid): + if (UUID_FIELD in archive_metadata and + archive_metadata[UUID_FIELD] != self.uuid): print("Mismatch of UUIDs between metadata and an archive " f"metadata found at: {archive_metadata_path}") return False diff --git a/test/test_metadata.py b/test/test_metadata.py index c0e777a4..29d28661 100644 --- a/test/test_metadata.py +++ b/test/test_metadata.py @@ -262,42 +262,34 @@ def test_set_experiment_and_uuid(uuid_exists, keep_uuid, is_new_experiment, @pytest.mark.parametrize( - "new_uuid, archive_metadata_exists, archive_uuid, expected_result", + "archive_metadata_exists, archive_uuid, expected_result", [ # A legacy archive exists, but there's no corresponding metadata + # in archive ( - True, False, None, True - ), - # A legacy archive exists, there's a metadata with a UUID in - # control directory, but no metadata in archive - ( - False, False, None, True + False, None, True ), # Archive metadata exists but has no UUID ( - False, True, None, False + True, None, True ), # Archive metadata exists with same UUID ( - False, True, "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", True + True, "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", True ), # Archive metadata exists with different UUID ( - False, True, "cb793e91-6168-4ed2-a70c-f6f9ccf1659b", False + True, "cb793e91-6168-4ed2-a70c-f6f9ccf1659b", False ), ] ) -def test_has_archive(new_uuid, archive_metadata_exists, archive_uuid, - expected_result): +def test_has_archive(archive_metadata_exists, archive_uuid, expected_result): # Setup config and metadata write_config(config) with cd(ctrldir): metadata = Metadata(archive_dir) metadata.uuid = "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136" - if new_uuid: - metadata.uuid_updated = True - # Setup archive and it's metadata file archive_path = archive_dir / "ctrl" archive_path.mkdir(parents=True)