From 7e45c1fa6fd4473fd732a8689a100fe1f991bd3a Mon Sep 17 00:00:00 2001 From: Mel Hall <37735232+datamel@users.noreply.github.com> Date: Thu, 9 Feb 2023 13:16:00 +0000 Subject: [PATCH 1/7] Fix cylc clean while cat-log running --- cylc/flow/cfgspec/globalcfg.py | 3 ++- cylc/flow/pathutil.py | 21 +++++++++++++++++---- cylc/flow/scripts/clean.py | 9 ++++++--- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/cylc/flow/cfgspec/globalcfg.py b/cylc/flow/cfgspec/globalcfg.py index 17ca5d98ddd..669e4ec6464 100644 --- a/cylc/flow/cfgspec/globalcfg.py +++ b/cylc/flow/cfgspec/globalcfg.py @@ -1426,7 +1426,8 @@ def default_for( "[remote]retrieve job logs retry delays")} ''') Conf('tail command template', - VDR.V_STRING, 'tail -n +1 -F %(filename)s', desc=f''' + VDR.V_STRING, 'tail -n +1 --follow=name -F %(filename)s', + desc=f''' A command template (with ``%(filename)s`` substitution) to tail-follow job logs this platform, by ``cylc cat-log``. diff --git a/cylc/flow/pathutil.py b/cylc/flow/pathutil.py index 0d62ef37ad4..8ad12582a33 100644 --- a/cylc/flow/pathutil.py +++ b/cylc/flow/pathutil.py @@ -19,12 +19,13 @@ from pathlib import Path import re from shutil import rmtree +from time import sleep from typing import Dict, Iterable, Set, Union, Optional, Any from cylc.flow import LOG from cylc.flow.cfgspec.glbl_cfg import glbl_cfg from cylc.flow.exceptions import ( - InputError, WorkflowFilesError, handle_rmtree_err + InputError, WorkflowFilesError ) from cylc.flow.platforms import get_localhost_install_target @@ -297,7 +298,7 @@ def remove_dir_and_target(path: Union[Path, str]) -> None: "Removing symlink and its target directory: " f"{path} -> {target}" ) - rmtree(target, onerror=handle_rmtree_err) + _rmtree(target) else: LOG.info(f'Removing broken symlink: {path}') os.remove(path) @@ -305,7 +306,19 @@ def remove_dir_and_target(path: Union[Path, str]) -> None: raise FileNotFoundError(path) else: LOG.info(f'Removing directory: {path}') - rmtree(path, onerror=handle_rmtree_err) + _rmtree(path) + + +def _rmtree(target, retries=5, sleep_time=0.5): + for _try_num in range(retries): + try: + rmtree(target) + return + except OSError as exc: + err = exc + sleep(sleep_time) + continue + raise err def remove_dir_or_file(path: Union[Path, str]) -> None: @@ -325,7 +338,7 @@ def remove_dir_or_file(path: Union[Path, str]) -> None: os.remove(path) else: LOG.info(f"Removing directory: {path}") - rmtree(path, onerror=handle_rmtree_err) + _rmtree(path) def remove_empty_parents( diff --git a/cylc/flow/scripts/clean.py b/cylc/flow/scripts/clean.py index b53e9120692..a8f46f74477 100644 --- a/cylc/flow/scripts/clean.py +++ b/cylc/flow/scripts/clean.py @@ -193,15 +193,18 @@ async def run(*ids: str, opts: 'Values') -> None: if multi_mode and not opts.skip_interactive: prompt(workflows) # prompt for approval or exit - failed = [] + failed = {} for workflow in sorted(workflows): try: init_clean(workflow, opts) except Exception as exc: - failed.append(workflow) + failed[workflow] = exc LOG.warning(exc) if failed: - raise CylcError(f"Clean failed: {', '.join(failed)}") + msg = "Clean failed:" + for workflow, exc_message in failed.items(): + msg += f"\nWorkflow: {workflow}\nError: {exc_message}" + raise CylcError(msg) @cli_function(get_option_parser) From dd818f2bee614d9bb6ecf65d3277edd86bc41cc8 Mon Sep 17 00:00:00 2001 From: Mel Hall <37735232+datamel@users.noreply.github.com> Date: Thu, 16 Mar 2023 11:54:47 +0000 Subject: [PATCH 2/7] Update _rmtree --- cylc/flow/pathutil.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/cylc/flow/pathutil.py b/cylc/flow/pathutil.py index 8ad12582a33..e5f265f13d5 100644 --- a/cylc/flow/pathutil.py +++ b/cylc/flow/pathutil.py @@ -15,6 +15,7 @@ # along with this program. If not, see . """Functions to return paths to common workflow files and directories.""" +import errno import os from pathlib import Path import re @@ -25,7 +26,7 @@ from cylc.flow import LOG from cylc.flow.cfgspec.glbl_cfg import glbl_cfg from cylc.flow.exceptions import ( - InputError, WorkflowFilesError + FileRemovalError, InputError, WorkflowFilesError ) from cylc.flow.platforms import get_localhost_install_target @@ -309,16 +310,24 @@ def remove_dir_and_target(path: Union[Path, str]) -> None: _rmtree(path) -def _rmtree(target, retries=5, sleep_time=0.5): +def _rmtree(target: Union[Path, str], + retries: int = 5, + sleep_time: float = 0.5): + """Make rmtree more robust to nfs issues. + + shutil.rmtree will be retried (default 5 times) + """ for _try_num in range(retries): try: rmtree(target) return except OSError as exc: - err = exc - sleep(sleep_time) - continue - raise err + if exc.errno == errno.ENOTEMPTY: + err = exc + sleep(sleep_time) + else: + raise + raise FileRemovalError(err) def remove_dir_or_file(path: Union[Path, str]) -> None: From ca03d23179032a70dc8656004fb6df09a75c83c4 Mon Sep 17 00:00:00 2001 From: Melanie Hall <37735232+datamel@users.noreply.github.com> Date: Fri, 17 Mar 2023 15:23:37 +0000 Subject: [PATCH 3/7] Update cylc/flow/scripts/clean.py Co-authored-by: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> --- cylc/flow/scripts/clean.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cylc/flow/scripts/clean.py b/cylc/flow/scripts/clean.py index a8f46f74477..317ed77b192 100644 --- a/cylc/flow/scripts/clean.py +++ b/cylc/flow/scripts/clean.py @@ -199,7 +199,6 @@ async def run(*ids: str, opts: 'Values') -> None: init_clean(workflow, opts) except Exception as exc: failed[workflow] = exc - LOG.warning(exc) if failed: msg = "Clean failed:" for workflow, exc_message in failed.items(): From f054199326133098ee807c7260fe96c9a75acd84 Mon Sep 17 00:00:00 2001 From: Oliver Sanders Date: Wed, 22 Mar 2023 17:34:51 +0000 Subject: [PATCH 4/7] Apply suggestions from code review --- cylc/flow/pathutil.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/cylc/flow/pathutil.py b/cylc/flow/pathutil.py index e5f265f13d5..bd1c7c08aa0 100644 --- a/cylc/flow/pathutil.py +++ b/cylc/flow/pathutil.py @@ -314,15 +314,29 @@ def _rmtree(target: Union[Path, str], retries: int = 5, sleep_time: float = 0.5): """Make rmtree more robust to nfs issues. - - shutil.rmtree will be retried (default 5 times) + + If a file is deleted which is being held open for reading by another process + NFS will create a ".nfs" file in the containing directory to handle this. + + If you try to delete the directory which contains these files you will get either + a ENOTEMPTY or EBUSY error. + + A likely cause of open file handles in cylc-run directories is `cylc cat-log -m t`. + If the file being cat-log'ged is removed, the command will fail on its next poll. + The default poll interval is one second, so if we wait a couple of seconds and + retry the removal it will likely work. + + This command retries removal a specified number of times at a specified + interval before failing to give cat-log process a chance to die gracefully and + release their filesystem locks. For more info see: + https://github.com/cylc/cylc-flow/pull/5359#issuecomment-1479989975 """ for _try_num in range(retries): try: rmtree(target) return except OSError as exc: - if exc.errno == errno.ENOTEMPTY: + if exc.errno in {errno.ENOTEMPTY, errno.EBUSY}: err = exc sleep(sleep_time) else: From eebc1023391a4297b7d39e6be7e621183e46e0c9 Mon Sep 17 00:00:00 2001 From: Oliver Sanders Date: Thu, 23 Mar 2023 14:50:44 +0000 Subject: [PATCH 5/7] Update cylc/flow/pathutil.py --- cylc/flow/pathutil.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/cylc/flow/pathutil.py b/cylc/flow/pathutil.py index bd1c7c08aa0..3acde37004c 100644 --- a/cylc/flow/pathutil.py +++ b/cylc/flow/pathutil.py @@ -310,24 +310,29 @@ def remove_dir_and_target(path: Union[Path, str]) -> None: _rmtree(path) -def _rmtree(target: Union[Path, str], - retries: int = 5, - sleep_time: float = 0.5): +def _rmtree( + target: Union[Path, str], + retries: int = 5, + sleep_time: float = 0.5, +): """Make rmtree more robust to nfs issues. - - If a file is deleted which is being held open for reading by another process - NFS will create a ".nfs" file in the containing directory to handle this. - - If you try to delete the directory which contains these files you will get either - a ENOTEMPTY or EBUSY error. - - A likely cause of open file handles in cylc-run directories is `cylc cat-log -m t`. - If the file being cat-log'ged is removed, the command will fail on its next poll. - The default poll interval is one second, so if we wait a couple of seconds and + + If a file is deleted which is being held open for reading by + another process. NFS will create a ".nfs" file in the + containing directory to handle this. + + If you try to delete the directory which contains these + files you will get either a ENOTEMPTY or EBUSY error. + + A likely cause of open file handles in cylc-run directories + is `cylc cat-log -m t`. If the file being cat-log'ged is removed, + the command will fail on its next poll. The default poll + interval is one second, so if we wait a couple of seconds and retry the removal it will likely work. - This command retries removal a specified number of times at a specified - interval before failing to give cat-log process a chance to die gracefully and + This command retries removal a specified number + of times at a specified interval before failing to + give cat-log process a chance to die gracefully and release their filesystem locks. For more info see: https://github.com/cylc/cylc-flow/pull/5359#issuecomment-1479989975 """ From df448ec4b25ea6fbabf70dd7bb9fa6d909d493be Mon Sep 17 00:00:00 2001 From: Oliver Sanders Date: Mon, 27 Mar 2023 15:19:59 +0100 Subject: [PATCH 6/7] Update cylc/flow/pathutil.py --- cylc/flow/pathutil.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cylc/flow/pathutil.py b/cylc/flow/pathutil.py index 3acde37004c..c8b47e41b3d 100644 --- a/cylc/flow/pathutil.py +++ b/cylc/flow/pathutil.py @@ -312,8 +312,8 @@ def remove_dir_and_target(path: Union[Path, str]) -> None: def _rmtree( target: Union[Path, str], - retries: int = 5, - sleep_time: float = 0.5, + retries: int = 10, + sleep_time: float = 1, ): """Make rmtree more robust to nfs issues. From 04efd7c26788d63b8dffeceedec2c8dd87b16d35 Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Tue, 28 Mar 2023 09:58:23 +0100 Subject: [PATCH 7/7] Update changelog --- CHANGES.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 47630ac289b..5410f3c2376 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -23,10 +23,14 @@ Fixes `cylc set-verbosity`. [#5394](https://github.com/cylc/cylc-flow/pull/5394) - Fixes a possible scheduler traceback observed with remote task polling. -[#5386](https://github.com/cylc/cylc-flow/pull/5386) Fix bug where +[#5386](https://github.com/cylc/cylc-flow/pull/5386) - Fix bug where absence of `job name length maximum` in PBS platform settings would cause Cylc to crash when preparing the job script. +[#5359](https://github.com/cylc/cylc-flow/pull/5359) - Fix bug where viewing +a workflow's log in the GUI or using `cylc cat-log` would prevent `cylc clean` +from working. + ------------------------------------------------------------------------------- ## __cylc-8.1.2 (Released 2023-02-20)__