diff --git a/CHANGES.md b/CHANGES.md index 47630ac289b..5410f3c2376 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -23,10 +23,14 @@ Fixes `cylc set-verbosity`. [#5394](https://github.com/cylc/cylc-flow/pull/5394) - Fixes a possible scheduler traceback observed with remote task polling. -[#5386](https://github.com/cylc/cylc-flow/pull/5386) Fix bug where +[#5386](https://github.com/cylc/cylc-flow/pull/5386) - Fix bug where absence of `job name length maximum` in PBS platform settings would cause Cylc to crash when preparing the job script. +[#5359](https://github.com/cylc/cylc-flow/pull/5359) - Fix bug where viewing +a workflow's log in the GUI or using `cylc cat-log` would prevent `cylc clean` +from working. + ------------------------------------------------------------------------------- ## __cylc-8.1.2 (Released 2023-02-20)__ diff --git a/cylc/flow/cfgspec/globalcfg.py b/cylc/flow/cfgspec/globalcfg.py index 17ca5d98ddd..669e4ec6464 100644 --- a/cylc/flow/cfgspec/globalcfg.py +++ b/cylc/flow/cfgspec/globalcfg.py @@ -1426,7 +1426,8 @@ def default_for( "[remote]retrieve job logs retry delays")} ''') Conf('tail command template', - VDR.V_STRING, 'tail -n +1 -F %(filename)s', desc=f''' + VDR.V_STRING, 'tail -n +1 --follow=name -F %(filename)s', + desc=f''' A command template (with ``%(filename)s`` substitution) to tail-follow job logs this platform, by ``cylc cat-log``. diff --git a/cylc/flow/pathutil.py b/cylc/flow/pathutil.py index 52066bc4a52..c08b638e1f4 100644 --- a/cylc/flow/pathutil.py +++ b/cylc/flow/pathutil.py @@ -15,16 +15,18 @@ # along with this program. If not, see . """Functions to return paths to common workflow files and directories.""" +import errno import os from pathlib import Path import re from shutil import rmtree +from time import sleep from typing import Dict, Iterable, Set, Union, Optional, Any from cylc.flow import LOG from cylc.flow.cfgspec.glbl_cfg import glbl_cfg from cylc.flow.exceptions import ( - InputError, WorkflowFilesError, handle_rmtree_err + FileRemovalError, InputError, WorkflowFilesError ) from cylc.flow.platforms import get_localhost_install_target @@ -297,7 +299,7 @@ def remove_dir_and_target(path: Union[Path, str]) -> None: "Removing symlink and its target directory: " f"{path} -> {target}" ) - rmtree(target, onerror=handle_rmtree_err) + _rmtree(target) else: LOG.info(f'Removing broken symlink: {path}') os.remove(path) @@ -305,7 +307,46 @@ def remove_dir_and_target(path: Union[Path, str]) -> None: raise FileNotFoundError(path) else: LOG.info(f'Removing directory: {path}') - rmtree(path, onerror=handle_rmtree_err) + _rmtree(path) + + +def _rmtree( + target: Union[Path, str], + retries: int = 10, + sleep_time: float = 1, +): + """Make rmtree more robust to nfs issues. + + If a file is deleted which is being held open for reading by + another process. NFS will create a ".nfs" file in the + containing directory to handle this. + + If you try to delete the directory which contains these + files you will get either a ENOTEMPTY or EBUSY error. + + A likely cause of open file handles in cylc-run directories + is `cylc cat-log -m t`. If the file being cat-log'ged is removed, + the command will fail on its next poll. The default poll + interval is one second, so if we wait a couple of seconds and + retry the removal it will likely work. + + This command retries removal a specified number + of times at a specified interval before failing to + give cat-log process a chance to die gracefully and + release their filesystem locks. For more info see: + https://github.com/cylc/cylc-flow/pull/5359#issuecomment-1479989975 + """ + for _try_num in range(retries): + try: + rmtree(target) + return + except OSError as exc: + if exc.errno in {errno.ENOTEMPTY, errno.EBUSY}: + err = exc + sleep(sleep_time) + else: + raise + raise FileRemovalError(err) def remove_dir_or_file(path: Union[Path, str]) -> None: @@ -325,7 +366,7 @@ def remove_dir_or_file(path: Union[Path, str]) -> None: os.remove(path) else: LOG.info(f"Removing directory: {path}") - rmtree(path, onerror=handle_rmtree_err) + _rmtree(path) def remove_empty_parents( diff --git a/cylc/flow/scripts/clean.py b/cylc/flow/scripts/clean.py index b53e9120692..317ed77b192 100644 --- a/cylc/flow/scripts/clean.py +++ b/cylc/flow/scripts/clean.py @@ -193,15 +193,17 @@ async def run(*ids: str, opts: 'Values') -> None: if multi_mode and not opts.skip_interactive: prompt(workflows) # prompt for approval or exit - failed = [] + failed = {} for workflow in sorted(workflows): try: init_clean(workflow, opts) except Exception as exc: - failed.append(workflow) - LOG.warning(exc) + failed[workflow] = exc if failed: - raise CylcError(f"Clean failed: {', '.join(failed)}") + msg = "Clean failed:" + for workflow, exc_message in failed.items(): + msg += f"\nWorkflow: {workflow}\nError: {exc_message}" + raise CylcError(msg) @cli_function(get_option_parser)