StackStorm · blag · Mar 23, 2021 · Mar 20, 2021 · Mar 21, 2021 · Mar 21, 2021
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -44,28 +44,45 @@ jobs:
           # - name: 'Micro Benchmarks'
           #   task: 'micro-benchmarks'
           #   python-version: '3.6'
-          # Integration tests are not working yet, still done in Travis
-          # - name: 'Integration Tests'
-          #   task: 'ci-integration'
+          - name: 'Integration Tests'
+            task: 'ci-integration'
+            python-version: '3.6'
     services:
       mongo:
         image: mongo:4.0
         ports:
           - 27017:27017
-      # Can't use RabbitMQ here for Integrations because we rely on custom config
-      # and SSL certs that are in the repo. In GHA, these services are started first
-      # before the code is checked out, so this is a non-starter, we need to do it
-      # manually below (TODO)
+
+      # In GHA, these services are started first before the code is checked out.
+      # We use bitnami images to facilitate reconfiguring RabbitMQ during ci-integration tests.
+      # We rely on custom config and SSL certs that are in the repo.
+      # Many images require config in env vars (which we can't change during the test job)
+      # or they require config in entrypoint args (which we can't override for GHA services)
+      # bitnami builds ways to get config files from mounted volumes.
       rabbitmq:
-        # use the -management version so it has the management tools installed
-        image: rabbitmq:3.8-management
+        image: bitnami/rabbitmq:3.8
+        volumes:
+          - /home/runner/rabbitmq_conf:/bitnami/conf  # RABBITMQ_MOUNTED_CONF_DIR
+        env:
+          # tell bitnami/rabbitmq to enable this by default
+          RABBITMQ_PLUGINS: rabbitmq_management
+          RABBITMQ_USERNAME: guest
+          RABBITMQ_PASSWORD: guest
+
+        # These are strictly docker options, not entrypoint args (GHA restriction)
+        options: >-
+          --name rabbitmq
+
         ports:
-          # SSL port
-          - 5671:5671
-          # standard port
-          - 5672:5672
-          # management port
-          - 15672:15672
+          # These 6 ports are exposed by bitnami/rabbitmq (see https://www.rabbitmq.com/networking.html#ports)
+          # host_port:container_port/protocol
+          - 5671:5671/tcp   # AMQP SSL port
+          - 5672:5672/tcp   # AMQP standard port
+          - 15672:15672/tcp # Management: HTTP, CLI
+          #- 15671:15671/tcp # Management: SSL port
+          #- 25672:25672/tcp # inter-node or CLI
+          #- 4369:4369/tcp   # epmd
+
     env:
       TASK: '${{ matrix.task }}'
 
@@ -114,14 +131,34 @@ jobs:
         uses: actions/setup-python@v2
         with:
           python-version: '${{ matrix.python-version }}'
+      - name: Get date components for use in cache-keys
+        id: date
+        run: |
+          echo "::set-output name=year::$(/bin/date -u "+%Y")"
+          echo "::set-output name=month::$(/bin/date -u "+%m")"
+          echo "::set-output name=week::$(/bin/date -u "+%U")"
       - uses: actions/cache@v2
         with:
           path: |
             ~/.cache/pip
             virtualenv
-          key: ${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('requirements.txt', 'test-requirements.txt') }}
+            ~/virtualenv
+          # TODO: maybe make the virtualenv a partial cache to exclude st2*?
+          # !virtualenv/lib/python*/site-packages/st2*
+          # !virtualenv/bin/st2*
+          key: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ hashFiles('requirements.txt', 'test-requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-python-${{ matrix.python }}-
+      - uses: actions/cache@v2
+        with:
+          path: |
+            /var/cache/apt/archives/*.deb
+            /var/cache/apt/archives/partial/*.deb
+            /var/cache/apt/*.bin
+          key: ${{ runner.os }}-apt-${{ steps.date.outputs.year }}-${{ steps.date.outputs.week }}
           restore-keys: |
-            ${{ runner.os }}-${{ matrix.python }}-
+            ${{ runner.os }}-apt-${{ steps.date.outputs.year }}-
+            ${{ runner.os }}-apt-
       - name: Install apt depedencies
         run: |
           # install dev dependencies for Python YAML and LDAP packages
@@ -130,9 +167,22 @@ jobs:
           sudo apt-get -f -y install libldap2-dev libsasl2-dev libssl-dev libyaml-dev ldap-utils
       - name: Install virtualenv
         run: |
+          set -x
           # Note: Use the verison of virtualenv pinned in fixed-requirements.txt so we
           #       only have to update it one place when we change the version
-          pip install --upgrade --force-reinstall $(grep "^virtualenv" fixed-requirements.txt)
+          # Note: Use --user to avoid polluting system site-packages (which breaks one of our tests)
+          # TODO: simplify this once fixed in contrib/runners/python_runner/tests/integration/test_pythonrunner_behavior.py
+          if [[ ! -f ~/virtualenv/bin/virtualenv ]]; then  # use the cached version whenever possible
+            pip install --user --upgrade --force-reinstall $(grep "^virtualenv" fixed-requirements.txt)
+            virtualenv --no-download ~/virtualenv
+            ~/virtualenv/bin/pip install --upgrade --force-reinstall $(grep "^virtualenv" fixed-requirements.txt)
+            # drop the --user install virtualenv to prevent polluting tests
+            pip freeze --user | xargs pip uninstall -y
+          fi
+          mkdir -p ~/.local/bin
+          ln -s ~/virtualenv/bin/virtualenv ~/.local/bin/virtualenv
+          which virtualenv
+          virtualenv --version
       - name: Install requirements
         run: |
           ./scripts/travis/install-requirements.sh
@@ -149,34 +199,43 @@ jobs:
         run: |
           echo "$ST2_CI_REPO_PATH"
           sudo ST2_CI_REPO_PATH="${ST2_CI_REPO_PATH}" scripts/travis/permissions-workaround.sh
-      - name: Setup RabbitMQ (NOT WORKING YET)
-        if: "${{ env.TASK == 'ci-integration' }}"
+      - name: Reconfigure RabbitMQ
+        if: "${{ env.TASK == 'ci-unit' || env.TASK == 'ci-integration' }}"
+        # bitnami image allows (see bitnami/rabbitmq readme):
+        # Here we're copying a rabbitmq.config file which won't do anything.
+        # We need to switch to custom.conf or advanced.config.
+        timeout-minutes: 2  # may die if rabbitmq fails to start
         run: |
+          set -x
           # Use custom RabbitMQ config which enables SSL / TLS listener on port 5671 with test certs
-          # Travis runs as the 'travis' user, GitHub actions run as the 'runner' user,
-          # And the cert filepaths are slightly different between the two.
-          # Example:
-          #   Travis-CI:      /home/travis/build/StackStorm/st2/st2tests/st2tests/fixtures/ssl_certs/ca/ca_certificate_bundle.pem
-          #   GitHub Actions: /home/runner/work/st2/st2/st2tests/st2tests/fixtures/ssl_certs/ca/ca_certificate_bundle.pem
-          sed -i 's|/home/travis/build/StackStorm|/home/runner/work/st2|g' scripts/travis/rabbitmq.config
-          # Now that we've manged the config file, install it
-          sudo cp scripts/travis/rabbitmq.config /etc/rabbitmq/rabbitmq.config
-          # Install rabbitmq_management RabbitMQ plugin
-          sudo service rabbitmq-server restart
-          sleep 5
-          sudo rabbitmq-plugins enable rabbitmq_management
+          sudo cp scripts/github/rabbitmq.conf /home/runner/rabbitmq_conf/custom.conf
+          # The code is checked out after the container is already up, so we don't mount them.
+          # We copy those certs into the dir that is mounted to /bitnami/conf
+          sudo cp -r st2tests/st2tests/fixtures/ssl_certs /home/runner/rabbitmq_conf/
+          # refresh rabbitmq config - based on ENTRYPOINT logic
+          docker exec rabbitmq bash -c 'cat /bitnami/conf/custom.conf >> /opt/bitnami/rabbitmq/etc/rabbitmq/rabbitmq.conf'
+          # sleep to prevent interleaved output in GHA logs
+          docker exec rabbitmq cat /opt/bitnami/rabbitmq/etc/rabbitmq/rabbitmq.conf && sleep 0.1
+          echo
+          echo restarting rabbitmq container
+          docker restart rabbitmq
+          # wait for rabbitmq container to restart
+          until [ "$(docker inspect -f {{.State.Running}} rabbitmq)" == "true" ]; do sleep 0.1; done
+          echo enabled RabbitMQ plugins:
+          # print plugins list to: (1) ease debugging, (2) pause till rabbitmq is really running
+          docker exec rabbitmq rabbitmq-plugins list -e
+          echo
           sudo wget http://guest:guest@localhost:15672/cli/rabbitmqadmin -O /usr/local/bin/rabbitmqadmin
           sudo chmod +x /usr/local/bin/rabbitmqadmin
-          sudo service rabbitmq-server restart
-          # chmod to make glob work (*.log to avoid log dir)
-          sudo chmod a+rx /var/log/rabbitmq
-          sudo tail -n 30 /var/log/rabbitmq/*.log
+          # print logs from stdout (RABBITMQ_LOGS=-)
+          docker logs --tail=20 rabbitmq
       - name: Print versions
         run: |
           # Print various binary versions
           git --version
           pip --version
           pip list
+          virtualenv --version
           # Print out various environment variables info
           make play
       - name: make

diff --git a/.travis.yml b/.travis.yml
@@ -85,6 +85,12 @@ cache:
     #- .tox/
 
 install:
+  # This triggers the same behavior on travis as we found on github because it installs six in the system-site-packages
+  #- /opt/python/3.6.7/bin/pip install --upgrade --force-reinstall $(grep "^virtualenv" fixed-requirements.txt)
+  #- mkdir -p ~/.local/bin
+  #- ln -s /opt/python/3.6.7/bin/virtualenv ~/.local/bin/virtualenv
+  - which virtualenv
+  - virtualenv --version
   - ./scripts/travis/install-requirements.sh
   # prep a travis-specific dev conf file that uses travis instead of stanley
   - cp conf/st2.dev.conf "${ST2_CONF}" ; sed -i -e "s/stanley/${ST2_CI_USER}/" "${ST2_CONF}"

diff --git a/Makefile b/Makefile
@@ -674,7 +674,7 @@ check-dependency-conflicts:
 	@echo
 	# Verify there are no conflicting dependencies
 	cat st2*/requirements.txt contrib/runners/*/requirements.txt | sort -u > req.txt && \
-	$(VIRTUALENV_DIR)/bin/pip-compile req.txt; \
+	$(VIRTUALENV_DIR)/bin/pip-compile req.txt || exit 1; \
 	if [[ -e req.txt ]]; then rm req.txt; fi
 
 .PHONY: virtualenv

diff --git a/contrib/runners/python_runner/python_runner/python_action_wrapper.py b/contrib/runners/python_runner/python_runner/python_action_wrapper.py
@@ -359,6 +359,12 @@ def _get_action_instance(self):
 
         stdin_data = sys.stdin.readline().strip()
 
+        if not stdin_data:
+            # This could indicate that parent process (e.g. process which runs the tests has
+            # incorrectly opened the stdin and that one is then inherited by the process which is
+            # spawning it which will cause issues)
+            raise ValueError("Received no valid parameters data from sys.stdin")
-            raise ValueError("Received no valid parameters data from sys.stdin")
+            raise ValueError(f"Received no valid parameters data from sys.stdin:\n{stdin_data}")
-            raise ValueError("Received no valid parameters data from sys.stdin")
+            raise ValueError(f"Received no valid parameters data from sys.stdin:\n{stdin_data}")
+
         try:
             stdin_parameters = orjson.loads(stdin_data)
             stdin_parameters = stdin_parameters.get("parameters", {})

diff --git a/contrib/runners/python_runner/tests/integration/test_python_action_process_wrapper.py b/contrib/runners/python_runner/tests/integration/test_python_action_process_wrapper.py
@@ -142,14 +142,21 @@ def test_stdin_params_timeout_no_stdin_data_provided(self):
             "python %s --pack=dummy --file-path=%s --config='%s' "
             "--stdin-parameters" % (WRAPPER_SCRIPT_PATH, file_path, config)
         )
-        exit_code, stdout, stderr = run_command(command_string, shell=True)
+        exit_code, stdout, stderr = run_command(
+            command_string, shell=True, close_fds=True
+        )
 
-        expected_msg = (
+        # Depending on how tests are spawned, sys.stdin may be opened and this will cause issues
+        # with this tests so we simply check for two different errors which are considered
+        # acceptable.
+        expected_msg_1 = (
             "ValueError: No input received and timed out while waiting for parameters "
             "from stdin"
         )
+        expected_msg_2 = "ValueError: Received no valid parameters data from sys.stdin"
+
         self.assertEqual(exit_code, 1)
-        self.assertIn(expected_msg, stderr)
+        self.assertTrue(expected_msg_1 in stderr or expected_msg_2 in stderr)
 
     def test_stdin_params_invalid_format_friendly_error(self):
         config = {}

diff --git a/contrib/runners/python_runner/tests/integration/test_pythonrunner_behavior.py b/contrib/runners/python_runner/tests/integration/test_pythonrunner_behavior.py
@@ -40,6 +40,12 @@
 
 
 class PythonRunnerBehaviorTestCase(CleanFilesTestCase, CleanDbTestCase):
+
+    # If you need these logs, then you probably also want to uncomment
+    # extra debug log messages in st2common/st2common/util/virtualenvs.py
+    # and pass --logging-level=DEBUG to nosetests
+    # DISPLAY_LOG_MESSAGES = True
+
     def setUp(self):
         super(PythonRunnerBehaviorTestCase, self).setUp()
         config.parse_args()
@@ -74,6 +80,15 @@ def test_priority_of_loading_library_after_setup_pack_virtualenv(self):
         (_, output, _) = self._run_action(
             pack_name, "get_library_path.py", {"module": "six"}
         )
+        # FIXME: This test fails if system site-packages has six because
+        # it won't get installed in the virtualenv (w/ --system-site-packages)
+        # system site-packages is never from a virtualenv.
+        # Travis has python installed in /opt/python/3.6.7
+        # with a no-system-site-packages virtualenv at /home/travis/virtualenv/python3.6.7
+        # GitHub Actions python is in /opt/hostedtoolcache/Python/3.6.13/x64/
+        # But ther isn't a virtualenv, so when we pip installed `virtualenv`,
+        # (which depends on, and therefore installs `six`)
+        # we installed it in system-site-packages not an intermediate virtualenv
         self.assertEqual(output["result"].find(self.virtualenvs_path), 0)
 
         # Conversely, this expects that 'mock' module file-path is not under sandbox library,

diff --git a/scripts/github/rabbitmq.conf b/scripts/github/rabbitmq.conf
@@ -0,0 +1,11 @@
+# bitnami/rabbitmq configuration file (gets merged with rabbitmq.conf)
+listeners.ssl.default            = 5671
+# /bitnami/conf is a directory mounted into the bitnami/rabbitmq container
+ssl_options.cacertfile           = /bitnami/conf/ssl_certs/ca/ca_certificate_bundle.pem
+ssl_options.certfile             = /bitnami/conf/ssl_certs/server/server_certificate.pem
+ssl_options.keyfile              = /bitnami/conf/ssl_certs/server/private_key.pem
+ssl_options.verify               = verify_peer
+ssl_options.fail_if_no_peer_cert = false
+
+# this is "insecure" but it doesn't matter for CI, and it simplifies integration test machinery
+loopback_users = none
diff --git a/scripts/travis/prepare-integration.sh b/scripts/travis/prepare-integration.sh
@@ -16,6 +16,9 @@ st2 --version
 # Clean up old screen log files
 rm -f logs/screen-*.log
 
+# ::group::/::endgroup:: is helpful github actions syntax to fold this section.
+echo ::group::launchdev.sh start -x
+
 # start dev environment in screens
 ./tools/launchdev.sh start -x
 
@@ -28,6 +31,9 @@ echo " === START: Catting screen process log files. ==="
 cat logs/screen-*.log
 echo " === END: Catting screen process log files. ==="
 
+# github actions: fold for launchdev.sh start -x
+echo ::endgroup::
+
 # Setup the virtualenv for the examples pack which is required for orquesta integration tests.
 st2 run packs.setup_virtualenv packs=examples
 
@@ -39,3 +45,6 @@ chmod 777 logs/*
 # root needs to access write some lock files when creating virtualenvs
 # o=other; X=only set execute bit if user execute bit is set (eg on dirs)
 chmod -R o+rwX ./virtualenv/
+# newer virtualenv versions are putting lock files under ~/.local
+# as this script runs with sudo, HOME is actually the CI user's home
+chmod -R o+rwX ${HOME}/.local/share/virtualenv
diff --git a/st2common/st2common/config.py b/st2common/st2common/config.py
@@ -393,8 +393,13 @@ def register_opts(ignore_errors=False):
 
     # Runner options
     default_python_bin_path = sys.executable
-    base_dir = os.path.dirname(os.path.realpath(default_python_bin_path))
+    # If the virtualenv uses a symlinked python, then try using virtualenv from that venv
+    # first before looking for virtualenv installed in python's system-site-packages.
+    base_dir = os.path.dirname(default_python_bin_path)
     default_virtualenv_bin_path = os.path.join(base_dir, "virtualenv")
+    if not os.path.exists(default_virtualenv_bin_path):
+        base_dir = os.path.dirname(os.path.realpath(default_python_bin_path))
+        default_virtualenv_bin_path = os.path.join(base_dir, "virtualenv")
 
     action_runner_opts = [
         # Common runner options

diff --git a/st2common/st2common/util/shell.py b/st2common/st2common/util/shell.py
@@ -47,6 +47,7 @@ def run_command(
     shell=False,
     cwd=None,
     env=None,
+    close_fds=None,
 ):
     """
     Run the provided command in a subprocess and wait until it completes.
@@ -73,6 +74,9 @@ def run_command(
                 environment from the current process is inherited.
     :type env: ``dict``
 
+    :param close_fds: True to close all the fds. By default when None is provided we rely on
+                      default upstream behavior which may be Python version specific.
+
     :rtype: ``tuple`` (exit_code, stdout, stderr)
     """
     if not isinstance(cmd, (list, tuple) + six.string_types):
@@ -83,6 +87,10 @@ def run_command(
     if not env:
         env = os.environ.copy()
 
+    kwargs = {}
+    if close_fds is not None:
+        kwargs["close_fds"] = close_fds
+
     process = concurrency.subprocess_popen(
         args=cmd,
         stdin=stdin,
@@ -91,6 +99,7 @@ def run_command(
         env=env,
         cwd=cwd,
         shell=shell,
+        **kwargs,
     )
     stdout, stderr = process.communicate()
     exit_code = process.returncode

diff --git a/st2common/st2common/util/virtualenvs.py b/st2common/st2common/util/virtualenvs.py
@@ -284,6 +284,10 @@ def install_requirements(
     )
     exit_code, stdout, stderr = run_command(cmd=cmd, env=env)
 
+    # Normally we don't want this, even in debug logs. But it is useful to
+    # investigate pip behavior changes & broken virtualenv integration tests.
+    # logger.debug(f"\npip stdout=\n{stdout}")
+
     if exit_code != 0:
         stdout = to_ascii(stdout)
         stderr = to_ascii(stderr)
@@ -330,6 +334,10 @@ def install_requirement(virtualenv_path, requirement, proxy_config=None, logger=
     )
     exit_code, stdout, stderr = run_command(cmd=cmd, env=env)
 
+    # Normally we don't want this, even in debug logs. But it is useful to
+    # investigate pip behavior changes & broken virtualenv integration tests.
+    # logger.debug(f"\npip stdout=\n{stdout}")
+
     if exit_code != 0:
         raise Exception(
             'Failed to install requirement "%s": %s' % (requirement, stdout)