reframe-hpc · vkarak · May 15, 2024 · May 13, 2024 · May 13, 2024 · May 14, 2024
diff --git a/docs/config_reference.rst b/docs/config_reference.rst
@@ -1112,6 +1112,7 @@ All logging handlers share the following set of common attributes:
       ``%(check_executable)s``, The value of the :attr:`~reframe.core.pipeline.RegressionTest.executable` attribute.
       ``%(check_executable_opts)s``, The value of the :attr:`~reframe.core.pipeline.RegressionTest.executable_opts` attribute.
       ``%(check_extra_resources)s``, The value of the :attr:`~reframe.core.pipeline.RegressionTest.extra_resources` attribute.
+      ``%(check_fail_reason)s``, The failure reason if the test has failed.
       ``%(check_hashcode)s``, The unique hash associated with this test.
       ``%(check_info)s``, Various information about this test; essentially the return value of the test's :func:`~reframe.core.pipeline.RegressionTest.info` function.
       ``%(check_job_completion_time)s``, Same as the ``(check_job_completion_time_unix)s`` but formatted according to ``datefmt``.
@@ -1184,6 +1185,9 @@ All logging handlers share the following set of common attributes:
 .. versionadded:: 4.3
    The ``%(check_#ALL)s`` special specifier is added.
 
+.. versionadded:: 4.7
+   The ``%(check_fail_reason)s`` specifier is added.
+
 
 .. py:attribute:: logging.handlers.format_perfvars
 .. py:attribute:: logging.handlers_perflog.format_perfvars

diff --git a/reframe/core/logging.py b/reframe/core/logging.py
@@ -19,7 +19,7 @@
 import reframe.utility.color as color
 import reframe.utility.jsonext as jsonext
 import reframe.utility.osext as osext
-from reframe.core.exceptions import ConfigError, LoggingError
+from reframe.core.exceptions import ConfigError, LoggingError, what
 from reframe.core.warnings import suppress_deprecations
 from reframe.utility import is_trivially_callable
 from reframe.utility.profile import TimeProfiler
@@ -128,14 +128,6 @@ def ignore_brokenpipe(hdlr, l):
 logging.Handler.handleError = handleError(logging.Handler.handleError)
 
 
-def _expand_params(check):
-    cls = type(check)
-    return {
-        name: getattr(check, name) for name, param in cls.param_space.items
-        if param.is_loggable()
-    }
-
-
 def _guess_delim(s):
     '''Guess the delimiter in the given logging format string'''
     delims = set()
@@ -863,6 +855,8 @@ def log_performance(self, level, task, msg=None, multiline=False):
         self.extra['check_partition'] = task.testcase.partition.name
         self.extra['check_environ'] = task.testcase.environ.name
         self.extra['check_result'] = 'pass' if task.succeeded else 'fail'
+        fail_reason = what(*task.exc_info) if not task.succeeded else None
+        self.extra['check_fail_reason'] = fail_reason
         if msg is None:
             msg = 'sent by ' + self.extra['osuser']
 

diff --git a/reframe/core/pipeline.py b/reframe/core/pipeline.py
@@ -2235,14 +2235,11 @@ def check_performance(self):
                 self.perf_variables[var] = sn.make_performance_function(expr,
                                                                         unit)
 
-        if self.is_dry_run():
-            return
-
         # Evaluate the performance function and retrieve the metrics
         with osext.change_dir(self._stagedir):
             for tag, expr in self.perf_variables.items():
                 try:
-                    value = expr.evaluate()
+                    value = expr.evaluate() if not self.is_dry_run() else None
                     unit = expr.unit
                 except Exception as e:
                     logging.getlogger().warning(
@@ -2282,27 +2279,30 @@ def check_performance(self):
 
                 self._perfvalues[key] = (value, *ref, unit)
 
-            # Check the performance variables against their references.
-            for key, values in self._perfvalues.items():
-                val, ref, low_thres, high_thres, *_ = values
+        if self.is_dry_run():
+            return
 
-                # Verify that val is a number
-                if not isinstance(val, numbers.Number):
-                    raise SanityError(
-                        f'the value extracted for performance variable '
-                        f'{key!r} is not a number: {val}'
-                    )
+        # Check the performance variables against their references.
+        for key, values in self._perfvalues.items():
+            val, ref, low_thres, high_thres, *_ = values
 
-                tag = key.split(':')[-1]
-                try:
-                    sn.evaluate(
-                        sn.assert_reference(
-                            val, ref, low_thres, high_thres,
-                            msg=('failed to meet reference: %s={0}, '
-                                 'expected {1} (l={2}, u={3})' % tag))
-                    )
-                except SanityError as e:
-                    raise PerformanceError(e) from None
+            # Verify that val is a number
+            if not isinstance(val, numbers.Number):
+                raise SanityError(
+                    f'the value extracted for performance variable '
+                    f'{key!r} is not a number: {val}'
+                )
+
+            tag = key.split(':')[-1]
+            try:
+                sn.evaluate(
+                    sn.assert_reference(
+                        val, ref, low_thres, high_thres,
+                        msg=('failed to meet reference: %s={0}, '
+                             'expected {1} (l={2}, u={3})' % tag))
+                )
+            except SanityError as e:
+                raise PerformanceError(e) from None
 
     def _copy_job_files(self, job, dst):
         if job is None:

diff --git a/reframe/frontend/cli.py b/reframe/frontend/cli.py
@@ -1432,7 +1432,7 @@ def module_unuse(*paths):
                         printer, options.duration or options.reruns
                     )
 
-            if options.performance_report:
+            if options.performance_report and not options.dry_run:
                 printer.info(runner.stats.performance_report())
 
             # Generate the report for this session

diff --git a/reframe/frontend/executors/__init__.py b/reframe/frontend/executors/__init__.py
@@ -4,6 +4,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 
 import abc
+import contextlib
 import copy
 import os
 import signal
@@ -328,6 +329,27 @@ def __exit__(this, exc_type, exc_value, traceback):
             self.fail()
             raise TaskExit from e
 
+    def _dry_run_call(self, fn, *args, **kwargs):
+        '''Call check's fn method in dry-run mode.'''
+
+        @contextlib.contextmanager
+        def temp_dry_run(check):
+            dry_run_save = check._rfm_dry_run
+            try:
+                check._rfm_dry_run = True
+                yield check
+            except ABORT_REASONS:
+                raise
+            except BaseException:
+                pass
+            finally:
+                check._rfm_dry_run = dry_run_save
+
+        with runtime.temp_config(self.testcase.partition.fullname):
+            with temp_dry_run(self.check):
+                return fn(*args, **kwargs)
+
+
     @logging.time_function
     def setup(self, *args, **kwargs):
         self.testcase.prepare()
@@ -372,12 +394,15 @@ def run_wait(self):
 
     @logging.time_function
     def sanity(self):
+        self._perflogger = logging.getperflogger(self.check)
         self._safe_call(self.check.sanity)
 
     @logging.time_function
-    def performance(self):
-        self._perflogger = logging.getperflogger(self.check)
-        self._safe_call(self.check.performance)
+    def performance(self, dry_run=False):
+        if dry_run:
+            self._dry_run_call(self.check.performance)
+        else:
+            self._safe_call(self.check.performance)
 
     @logging.time_function
     def finalize(self):

diff --git a/reframe/frontend/executors/policies.py b/reframe/frontend/executors/policies.py
@@ -199,6 +199,10 @@ def on_task_failure(self, task):
             self.printer.status('FAIL', msg, just='right')
 
         _print_perf(task)
+        if task.failed_stage == 'sanity':
+            # Dry-run the performance stage to trigger performance logging
+            task.performance(dry_run=True)
+
         timings = task.pipeline_timings(['setup',
                                          'compile_complete',
                                          'run_complete',
@@ -617,6 +621,10 @@ def on_task_failure(self, task):
             self.printer.status('FAIL', msg, just='right')
 
         _print_perf(task)
+        if task.failed_stage == 'sanity':
+            # Dry-run the performance stage to trigger performance logging
+            task.performance(dry_run=True)
+
         timings = task.pipeline_timings(['setup',
                                          'compile_complete',
                                          'run_complete',

diff --git a/unittests/test_cli.py b/unittests/test_cli.py
@@ -423,7 +423,8 @@ def test_perflogdir_from_env(run_reframe, tmp_path, monkeypatch):
 def test_performance_report(run_reframe, run_action):
     returncode, stdout, _ = run_reframe(
         checkpath=['unittests/resources/checks/frontend_checks.py'],
-        more_options=['-n', 'PerformanceFailureCheck', '--performance-report'],
+        more_options=['-n', '^PerformanceFailureCheck',
+                      '--performance-report'],
         action=run_action
     )
     if run_action == 'run':

diff --git a/unittests/test_policies.py b/unittests/test_policies.py
@@ -1400,3 +1400,28 @@ def test_perf_logging_param_test(make_runner, make_exec_ctx, perf_param_tests,
                'default' / '_MyPerfParamTest.log')
     assert os.path.exists(logfile)
     assert _count_lines(logfile) == 3
+
+
+def test_perf_logging_sanity_failure(make_runner, make_exec_ctx,
+                                     config_perflog, tmp_path):
+    class _X(_MyPerfTest):
+        @sanity_function
+        def validate(self):
+            return sn.assert_true(0, msg='no way')
+
+    make_exec_ctx(config_perflog(
+        fmt='%(check_result)s|%(check_fail_reason)s|%(check_perfvalues)s',
+        perffmt='%(check_perf_value)s|'
+    ))
+    logging.configure_logging(rt.runtime().site_config)
+    runner = make_runner()
+    testcases = executors.generate_testcases([_X()])
+    _assert_no_logging_error(runner.runall, testcases)
+
+    logfile = tmp_path / 'perflogs' / 'generic' / 'default' / '_X.log'
+    assert os.path.exists(logfile)
+    with open(logfile) as fp:
+        lines = fp.readlines()
+
+    assert len(lines) == 2
+    assert lines[1] == 'fail|sanity error: no way|None|None\n'