From 96ba6f260ccdbb09dc82ef8f9b4d7132539f69ea Mon Sep 17 00:00:00 2001
From: Markus Lavin <markus.lavin@ericsson.com>
Date: Wed, 6 Oct 2021 08:48:37 +0200
Subject: [PATCH] [NPM] Automatic 'opt' pipeline reducer script.

Script for automatic 'opt' pipeline reduction for when using the new
pass-manager (NPM). Based around the '-print-pipeline-passes' option.

The reduction algorithm consists of several phases (steps).

Step #0: Verify that input fails with the given pipeline and make note of the
error code.

Step #1: Split pipeline in two starting from front and move forward as long as
first pipeline exits normally and the second pipeline fails with the expected
error code. Move on to step #2 with the IR from the split point and the
pipeline from the second invocation.

Step #2: Remove passes from end of the pipeline as long as the pipeline fails
with the expected error code.

Step #3: Make several sweeps over the remaining pipeline trying to remove one
pass at a time. Repeat sweeps until unable to remove any more passes.

Usage example:
./utils/reduce_pipeline.py --opt-binary=./build-all-Debug/bin/opt --input=input.ll --output=output.ll --passes=PIPELINE [EXTRA-OPT-ARGS ...]

Differential Revision: https://reviews.llvm.org/D110908
---
 llvm/utils/pipeline.py                      | 172 ++++++++++++++++
 llvm/utils/reduce_pipeline.py               | 212 ++++++++++++++++++++
 llvm/utils/reduce_pipeline_test/fake_opt.py |  73 +++++++
 llvm/utils/reduce_pipeline_test/test.py     |  92 +++++++++
 4 files changed, 549 insertions(+)
 create mode 100644 llvm/utils/pipeline.py
 create mode 100755 llvm/utils/reduce_pipeline.py
 create mode 100755 llvm/utils/reduce_pipeline_test/fake_opt.py
 create mode 100755 llvm/utils/reduce_pipeline_test/test.py

diff --git a/llvm/utils/pipeline.py b/llvm/utils/pipeline.py
new file mode 100644
index 000000000000..7e49f711d3e7
--- /dev/null
+++ b/llvm/utils/pipeline.py
@@ -0,0 +1,172 @@
+# Automatically formatted with yapf (https://github.com/google/yapf)
+"""Utility functions for creating and manipulating LLVM 'opt' NPM pipeline objects."""
+
+
+def fromStr(pipeStr):
+    """Create pipeline object from string representation."""
+    stack = []
+    curr = []
+    tok = ''
+    kind = ''
+    for c in pipeStr:
+        if c == ',':
+            if tok != '':
+                curr.append([None, tok])
+            tok = ''
+        elif c == '(':
+            stack.append([kind, curr])
+            kind = tok
+            curr = []
+            tok = ''
+        elif c == ')':
+            if tok != '':
+                curr.append([None, tok])
+            tok = ''
+            oldKind = kind
+            oldCurr = curr
+            [kind, curr] = stack.pop()
+            curr.append([oldKind, oldCurr])
+        else:
+            tok += c
+    if tok != '':
+        curr.append([None, tok])
+    return curr
+
+
+def toStr(pipeObj):
+    """Create string representation of pipeline object."""
+    res = ''
+    lastIdx = len(pipeObj) - 1
+    for i, c in enumerate(pipeObj):
+        if c[0]:
+            res += c[0] + '('
+            res += toStr(c[1])
+            res += ')'
+        else:
+            res += c[1]
+        if i != lastIdx:
+            res += ','
+    return res
+
+
+def count(pipeObj):
+    """Count number of passes (pass-managers excluded) in pipeline object."""
+    cnt = 0
+    for c in pipeObj:
+        if c[0]:
+            cnt += count(c[1])
+        else:
+            cnt += 1
+    return cnt
+
+
+def split(pipeObj, splitIndex):
+    """Create two new pipeline objects by splitting pipeObj in two directly after pass with index splitIndex."""
+    def splitInt(src, splitIndex, dstA, dstB, idx):
+        for s in src:
+            if s[0]:
+                dstA2 = []
+                dstB2 = []
+                idx = splitInt(s[1], splitIndex, dstA2, dstB2, idx)
+                dstA.append([s[0], dstA2])
+                dstB.append([s[0], dstB2])
+            else:
+                if idx <= splitIndex:
+                    dstA.append([None, s[1]])
+                else:
+                    dstB.append([None, s[1]])
+                idx += 1
+        return idx
+
+    listA = []
+    listB = []
+    splitInt(pipeObj, splitIndex, listA, listB, 0)
+    return [listA, listB]
+
+
+def remove(pipeObj, removeIndex):
+    """Create new pipeline object by removing pass with index removeIndex from pipeObj."""
+    def removeInt(src, removeIndex, dst, idx):
+        for s in src:
+            if s[0]:
+                dst2 = []
+                idx = removeInt(s[1], removeIndex, dst2, idx)
+                dst.append([s[0], dst2])
+            else:
+                if idx != removeIndex:
+                    dst.append([None, s[1]])
+                idx += 1
+        return idx
+
+    dst = []
+    removeInt(pipeObj, removeIndex, dst, 0)
+    return dst
+
+
+def copy(srcPipeObj):
+    """Create copy of pipeline object srcPipeObj."""
+    def copyInt(dst, src):
+        for s in src:
+            if s[0]:
+                dst2 = []
+                copyInt(dst2, s[1])
+                dst.append([s[0], dst2])
+            else:
+                dst.append([None, s[1]])
+
+    dstPipeObj = []
+    copyInt(dstPipeObj, srcPipeObj)
+    return dstPipeObj
+
+
+def prune(srcPipeObj):
+    """Create new pipeline object by removing empty pass-managers (those with count = 0) from srcPipeObj."""
+    def pruneInt(dst, src):
+        for s in src:
+            if s[0]:
+                if count(s[1]):
+                    dst2 = []
+                    pruneInt(dst2, s[1])
+                    dst.append([s[0], dst2])
+            else:
+                dst.append([None, s[1]])
+
+    dstPipeObj = []
+    pruneInt(dstPipeObj, srcPipeObj)
+    return dstPipeObj
+
+
+if __name__ == "__main__":
+    import unittest
+
+    class Test(unittest.TestCase):
+        def test_0(self):
+            pipeStr = 'a,b,A(c,B(d,e),f),g'
+            pipeObj = fromStr(pipeStr)
+
+            self.assertEqual(7, count(pipeObj))
+
+            self.assertEqual(pipeObj, pipeObj)
+            self.assertEqual(pipeObj, prune(pipeObj))
+            self.assertEqual(pipeObj, copy(pipeObj))
+
+            self.assertEqual(pipeStr, toStr(pipeObj))
+            self.assertEqual(pipeStr, toStr(prune(pipeObj)))
+            self.assertEqual(pipeStr, toStr(copy(pipeObj)))
+
+            [pipeObjA, pipeObjB] = split(pipeObj, 3)
+            self.assertEqual('a,b,A(c,B(d))', toStr(pipeObjA))
+            self.assertEqual('A(B(e),f),g', toStr(pipeObjB))
+
+            self.assertEqual('b,A(c,B(d,e),f),g', toStr(remove(pipeObj, 0)))
+            self.assertEqual('a,b,A(c,B(d,e),f)', toStr(remove(pipeObj, 6)))
+
+            pipeObjC = remove(pipeObj, 4)
+            self.assertEqual('a,b,A(c,B(d),f),g', toStr(pipeObjC))
+            pipeObjC = remove(pipeObjC, 3)
+            self.assertEqual('a,b,A(c,B(),f),g', toStr(pipeObjC))
+            pipeObjC = prune(pipeObjC)
+            self.assertEqual('a,b,A(c,f),g', toStr(pipeObjC))
+
+    unittest.main()
+    exit(0)
diff --git a/llvm/utils/reduce_pipeline.py b/llvm/utils/reduce_pipeline.py
new file mode 100755
index 000000000000..7779bb805e13
--- /dev/null
+++ b/llvm/utils/reduce_pipeline.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+
+# Automatically formatted with yapf (https://github.com/google/yapf)
+
+# Script for automatic 'opt' pipeline reduction for when using the new
+# pass-manager (NPM). Based around the '-print-pipeline-passes' option.
+#
+# The reduction algorithm consists of several phases (steps).
+#
+# Step #0: Verify that input fails with the given pipeline and make note of the
+# error code.
+#
+# Step #1: Split pipeline in two starting from front and move forward as long as
+# first pipeline exits normally and the second pipeline fails with the expected
+# error code. Move on to step #2 with the IR from the split point and the
+# pipeline from the second invocation.
+#
+# Step #2: Remove passes from end of the pipeline as long as the pipeline fails
+# with the expected error code.
+#
+# Step #3: Make several sweeps over the remaining pipeline trying to remove one
+# pass at a time. Repeat sweeps until unable to remove any more passes.
+#
+# Usage example:
+# reduce_pipeline.py --opt-binary=./build-all-Debug/bin/opt --input=input.ll --output=output.ll --passes=PIPELINE [EXTRA-OPT-ARGS ...]
+
+import argparse
+import pipeline
+import shutil
+import subprocess
+import tempfile
+
+parser = argparse.ArgumentParser(
+    description=
+    'Automatic opt pipeline reducer. Unrecognized arguments are forwarded to opt.'
+)
+parser.add_argument('--opt-binary',
+                    action='store',
+                    dest='opt_binary',
+                    default='opt')
+parser.add_argument('--passes', action='store', dest='passes', required=True)
+parser.add_argument('--input', action='store', dest='input', required=True)
+parser.add_argument('--output', action='store', dest='output')
+parser.add_argument('--dont-expand-passes',
+                    action='store_true',
+                    dest='dont_expand_passes',
+                    help='Do not expand pipeline before starting reduction.')
+parser.add_argument(
+    '--dont-remove-empty-pm',
+    action='store_true',
+    dest='dont_remove_empty_pm',
+    help='Do not remove empty pass-managers from the pipeline during reduction.'
+)
+[args, extra_opt_args] = parser.parse_known_args()
+
+print('The following extra args will be passed to opt: {}'.format(
+    extra_opt_args))
+
+lst = pipeline.fromStr(args.passes)
+passes = '-passes={}'.format(pipeline.toStr(lst))
+ll_input = args.input
+
+# Step #-1
+# Launch 'opt' once with '-print-pipeline-passes' to expand pipeline before
+# starting reduction. Allows specifying a default pipelines (e.g.
+# '-passes=default<O3>').
+if not args.dont_expand_passes:
+    run_args = [
+        args.opt_binary, '-disable-symbolication', '-disable-output',
+        '-print-pipeline-passes', passes, ll_input
+    ]
+    run_args.extend(extra_opt_args)
+    opt = subprocess.run(run_args,
+                         stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE)
+    if opt.returncode != 0:
+        print('Failed to expand passes. Aborting.')
+        print(run_args)
+        print('exitcode: {}'.format(opt.returncode))
+        print(opt.stderr.decode())
+        exit(1)
+    stdout = opt.stdout.decode()
+    stdout = stdout[:stdout.rfind('\n')]
+    print('Expanded pass sequence: {}'.format(stdout))
+    passes = '-passes={}'.format(stdout)
+
+# Step #0
+# Confirm that the given input, passes and options result in failure.
+print('---Starting step #0---')
+run_args = [
+    args.opt_binary, '-disable-symbolication', '-disable-output', passes,
+    ll_input
+]
+run_args.extend(extra_opt_args)
+opt = subprocess.run(run_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+if opt.returncode >= 0:
+    print('Input does not result in failure as expected. Aborting.')
+    print(run_args)
+    print('exitcode: {}'.format(opt.returncode))
+    print(opt.stderr.decode())
+    exit(1)
+
+expected_error_returncode = opt.returncode
+print('-passes="{}"'.format(pipeline.toStr(lst)))
+
+# Step #1
+# Try to narrow down the failing pass sequence by splitting the pipeline in two
+# opt invocations (A and B) starting with invocation A only running the first
+# pipeline pass and invocation B the remaining. Keep moving the split point
+# forward as long as invocation A exits normally and invocation B fails with
+# the expected error. This will accomplish two things first the input IR will be
+# further reduced and second, with that IR, the reduced pipeline for invocation
+# B will be sufficient to reproduce.
+print('---Starting step #1---')
+prevLstB = None
+prevIntermediate = None
+tmpd = tempfile.TemporaryDirectory()
+
+for idx in range(pipeline.count(lst)):
+    [lstA, lstB] = pipeline.split(lst, idx)
+    if not args.dont_remove_empty_pm:
+        lstA = pipeline.prune(lstA)
+        lstB = pipeline.prune(lstB)
+    passesA = '-passes=' + pipeline.toStr(lstA)
+    passesB = '-passes=' + pipeline.toStr(lstB)
+
+    intermediate = 'intermediate-0.ll' if idx % 2 else 'intermediate-1.ll'
+    intermediate = tmpd.name + '/' + intermediate
+    run_args = [
+        args.opt_binary, '-disable-symbolication', '-S', '-o', intermediate,
+        passesA, ll_input
+    ]
+    run_args.extend(extra_opt_args)
+    optA = subprocess.run(run_args,
+                          stdout=subprocess.PIPE,
+                          stderr=subprocess.PIPE)
+    run_args = [
+        args.opt_binary, '-disable-symbolication', '-disable-output', passesB,
+        intermediate
+    ]
+    run_args.extend(extra_opt_args)
+    optB = subprocess.run(run_args,
+                          stdout=subprocess.PIPE,
+                          stderr=subprocess.PIPE)
+    if not (optA.returncode == 0
+            and optB.returncode == expected_error_returncode):
+        break
+    prevLstB = lstB
+    prevIntermediate = intermediate
+if prevLstB:
+    lst = prevLstB
+    ll_input = prevIntermediate
+print('-passes="{}"'.format(pipeline.toStr(lst)))
+
+# Step #2
+# Try removing passes from the end of the remaining pipeline while still
+# reproducing the error.
+print('---Starting step #2---')
+prevLstA = None
+for idx in reversed(range(pipeline.count(lst))):
+    [lstA, lstB] = pipeline.split(lst, idx)
+    if not args.dont_remove_empty_pm:
+        lstA = pipeline.prune(lstA)
+    passesA = '-passes=' + pipeline.toStr(lstA)
+    run_args = [
+        args.opt_binary, '-disable-symbolication', '-disable-output', passesA,
+        ll_input
+    ]
+    run_args.extend(extra_opt_args)
+    optA = subprocess.run(run_args,
+                          stdout=subprocess.PIPE,
+                          stderr=subprocess.PIPE)
+    if optA.returncode != expected_error_returncode:
+        break
+    prevLstA = lstA
+if prevLstA:
+    lst = prevLstA
+print('-passes="{}"'.format(pipeline.toStr(lst)))
+
+# Step #3
+# Now that we have a pipeline that is reduced both front and back we do
+# exhaustive sweeps over the remainder trying to remove one pass at a time.
+# Repeat as long as reduction is possible.
+print('---Starting step #3---')
+while True:
+    keepGoing = False
+    for idx in range(pipeline.count(lst)):
+        candLst = pipeline.remove(lst, idx)
+        if not args.dont_remove_empty_pm:
+            candLst = pipeline.prune(candLst)
+        passes = '-passes=' + pipeline.toStr(candLst)
+        run_args = [
+            args.opt_binary, '-disable-symbolication', '-disable-output',
+            passes, ll_input
+        ]
+        run_args.extend(extra_opt_args)
+        opt = subprocess.run(run_args,
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        if opt.returncode == expected_error_returncode:
+            lst = candLst
+            keepGoing = True
+    if not keepGoing:
+        break
+print('-passes="{}"'.format(pipeline.toStr(lst)))
+
+print('---FINISHED---')
+if args.output:
+    shutil.copy(ll_input, args.output)
+    print('Wrote output to \'{}\'.'.format(args.output))
+print('-passes="{}"'.format(pipeline.toStr(lst)))
+exit(0)
diff --git a/llvm/utils/reduce_pipeline_test/fake_opt.py b/llvm/utils/reduce_pipeline_test/fake_opt.py
new file mode 100755
index 000000000000..225b6ee81432
--- /dev/null
+++ b/llvm/utils/reduce_pipeline_test/fake_opt.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+# Automatically formatted with yapf (https://github.com/google/yapf)
+
+# Fake 'opt' program that can be made to crash on request. For testing
+# the 'reduce_pipeline.py' automatic 'opt' NPM pipeline reducer.
+
+import argparse
+import os
+import shutil
+import signal
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-passes', action='store', dest='passes', required=True)
+parser.add_argument('-print-pipeline-passes',
+                    dest='print_pipeline_passes',
+                    action='store_true')
+parser.add_argument('-crash-seq',
+                    action='store',
+                    dest='crash_seq',
+                    required=True)
+parser.add_argument('-o', action='store', dest='output')
+parser.add_argument('input')
+[args, unknown_args] = parser.parse_known_args()
+
+# Echo pipeline if '-print-pipeline-passes'.
+if args.print_pipeline_passes:
+    print(args.passes)
+    exit(0)
+
+# Parse '-crash-seq'.
+crash_seq = []
+tok = ''
+for c in args.crash_seq:
+    if c == ',':
+        if tok != '':
+            crash_seq.append(tok)
+        tok = ''
+    else:
+        tok += c
+if tok != '':
+    crash_seq.append(tok)
+print(crash_seq)
+
+# Parse '-passes' and see if we need to crash.
+tok = ''
+for c in args.passes:
+    if c == ',':
+        if len(crash_seq) > 0 and crash_seq[0] == tok:
+            crash_seq.pop(0)
+        tok = ''
+    elif c == '(':
+        tok = ''
+    elif c == ')':
+        if len(crash_seq) > 0 and crash_seq[0] == tok:
+            crash_seq.pop(0)
+        tok = ''
+    else:
+        tok += c
+if len(crash_seq) > 0 and crash_seq[0] == tok:
+    crash_seq.pop(0)
+
+# Copy input to output.
+if args.output:
+    shutil.copy(args.input, args.output)
+
+# Crash if all 'crash_seq' passes occured in right order.
+if len(crash_seq) == 0:
+    print('crash')
+    os.kill(os.getpid(), signal.SIGKILL)
+else:
+    print('no crash')
+    exit(0)
diff --git a/llvm/utils/reduce_pipeline_test/test.py b/llvm/utils/reduce_pipeline_test/test.py
new file mode 100755
index 000000000000..03616fd17bdb
--- /dev/null
+++ b/llvm/utils/reduce_pipeline_test/test.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+
+# Automatically formatted with yapf (https://github.com/google/yapf)
+
+import subprocess
+import unittest
+
+
+def getFinalPasses(run):
+    stdout = run.stdout.decode()
+    stdout = stdout[:stdout.rfind('\n')]
+    stdout = stdout[stdout.rfind('\n') + 1:]
+    return stdout
+
+
+class Test(unittest.TestCase):
+    def test_0(self):
+        """Test all passes are removed except those required to crash. Verify
+           that PM structure is intact."""
+        run_args = [
+            './utils/reduce_pipeline.py',
+            '--opt-binary=./utils/reduce_pipeline_test/fake_opt.py',
+            '--input=/dev/null', '--passes=a,b,c,A(d,B(e,f),g),h,i',
+            '-crash-seq=b,d,f'
+        ]
+        run = subprocess.run(run_args,
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        self.assertEqual(run.returncode, 0)
+        self.assertEqual(getFinalPasses(run), '-passes="b,A(d,B(f))"')
+
+    def test_1(self):
+        """Test all passes are removed except those required to crash. The
+           required passes in this case are the first and last in that order
+           (a bit of a corner-case for the reduction algorithm)."""
+        run_args = [
+            './utils/reduce_pipeline.py',
+            '--opt-binary=./utils/reduce_pipeline_test/fake_opt.py',
+            '--input=/dev/null', '--passes=a,b,c,A(d,B(e,f),g),h,i',
+            '-crash-seq=a,i'
+        ]
+        run = subprocess.run(run_args,
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        self.assertEqual(run.returncode, 0)
+        self.assertEqual(getFinalPasses(run), '-passes="a,i"')
+
+    def test_2(self):
+        """Test the '--dont-expand-passes' option."""
+        run_args = [
+            './utils/reduce_pipeline.py',
+            '--opt-binary=./utils/reduce_pipeline_test/fake_opt.py',
+            '--input=/dev/null', '--passes=a,b,c,A(d,B(e,f),g),h,i',
+            '-crash-seq=b,d,f', '--dont-expand-passes'
+        ]
+        run = subprocess.run(run_args,
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        self.assertEqual(run.returncode, 0)
+        self.assertEqual(getFinalPasses(run), '-passes="b,A(d,B(f))"')
+
+    def test_3(self):
+        """Test that empty pass-managers get removed by default."""
+        run_args = [
+            './utils/reduce_pipeline.py',
+            '--opt-binary=./utils/reduce_pipeline_test/fake_opt.py',
+            '--input=/dev/null', '--passes=a,b,c,A(d,B(e,f),g),h,i',
+            '-crash-seq=b,d,h'
+        ]
+        run = subprocess.run(run_args,
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        self.assertEqual(run.returncode, 0)
+        self.assertEqual(getFinalPasses(run), '-passes="b,A(d),h"')
+
+    def test_4(self):
+        """Test the '--dont-remove-empty-pm' option."""
+        run_args = [
+            './utils/reduce_pipeline.py',
+            '--opt-binary=./utils/reduce_pipeline_test/fake_opt.py',
+            '--input=/dev/null', '--passes=a,b,c,A(d,B(e,f),g),h,i',
+            '-crash-seq=b,d,h', '--dont-remove-empty-pm'
+        ]
+        run = subprocess.run(run_args,
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        self.assertEqual(run.returncode, 0)
+        self.assertEqual(getFinalPasses(run), '-passes="b,A(d,B()),h"')
+
+
+unittest.main()
+exit(0)