diff --git a/sdks/python/apache_beam/examples/complete/juliaset/juliaset_main.py b/sdks/python/apache_beam/examples/complete/juliaset/juliaset_main.py index fb64c2702fd2..110b99b31b5f 100644 --- a/sdks/python/apache_beam/examples/complete/juliaset/juliaset_main.py +++ b/sdks/python/apache_beam/examples/complete/juliaset/juliaset_main.py @@ -33,7 +33,13 @@ startup, will install the tarball. Below is a complete command line for running the juliaset workflow remotely as -an example: +an example. + +First, install the juliaset package in the submission environment: + +pip install -e . + +Then, launch a Beam job: python juliaset_main.py \ --job_name juliaset-$USER \ @@ -52,7 +58,7 @@ import logging -from apache_beam.examples.complete.juliaset.juliaset import juliaset +from juliaset import juliaset if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) diff --git a/sdks/python/apache_beam/examples/complete/juliaset/pyproject.toml b/sdks/python/apache_beam/examples/complete/juliaset/pyproject.toml index 6c865974cbd2..436607838870 100644 --- a/sdks/python/apache_beam/examples/complete/juliaset/pyproject.toml +++ b/sdks/python/apache_beam/examples/complete/juliaset/pyproject.toml @@ -27,7 +27,3 @@ description = "Julia set workflow package." dependencies = [ "numpy" ] - -[build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" diff --git a/sdks/python/apache_beam/examples/complete/juliaset/setup.py b/sdks/python/apache_beam/examples/complete/juliaset/setup.py index 649a5be7db75..d47cade92d72 100644 --- a/sdks/python/apache_beam/examples/complete/juliaset/setup.py +++ b/sdks/python/apache_beam/examples/complete/juliaset/setup.py @@ -14,105 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # +"""An optional setuptools configuration stub for the pipeline package. -"""setup.py module for the pipeline package. - -In this example, the pipeline code is gathered in a package that can be built -as source distribution and installed on the workers. The package is defined -in the pyproject.toml file. You can use setup.py file for defining -configuration that needs to be determined programatically, for example, -custom commands to run when a package is installed. - -You can install this package into the workers at runtime by using -the --setup_file pipeline option. +Use pyproject.toml to define the package. Add this file if you must +use the --setup_file pipeline option on Beam 2.59.0 or earlier, or +if your package requires some programmatic configuration at build or +installation time. """ -# pytype: skip-file - -import subprocess - import setuptools - -# It is recommended to import setuptools prior to importing distutils to avoid -# using legacy behavior from distutils. -# https://setuptools.readthedocs.io/en/latest/history.html#v48-0-0 -from distutils.command.build import build as _build # isort:skip - - -# This class handles the pip install mechanism. -class build(_build): # pylint: disable=invalid-name - """A build command class that will be invoked during package install. - - The package built using the current setup.py will be staged and later - installed in the worker using `pip install package'. This class will be - instantiated during install for this specific scenario and will trigger - running the custom commands specified. - """ - sub_commands = _build.sub_commands + [('CustomCommands', None)] - - -# Some custom command to run during setup. The command is not essential for this -# workflow. It is used here as an example. Each command will spawn a child -# process. Typically, these commands will include steps to install non-Python -# packages. For instance, to install a C++-based library libjpeg62 the following -# two commands will have to be added: -# -# ['apt-get', 'update'], -# ['apt-get', '--assume-yes', 'install', 'libjpeg62'], -# -# First, note that there is no need to use the sudo command because the setup -# script runs with appropriate access. -# Second, if apt-get tool is used then the first command needs to be 'apt-get -# update' so the tool refreshes itself and initializes links to download -# repositories. Without this initial step the other apt-get install commands -# will fail with package not found errors. Note also --assume-yes option which -# shortcuts the interactive confirmation. -# -# Note that in this example custom commands will run after installing required -# packages. If you have a PyPI package that depends on one of the custom -# commands, move installation of the dependent package to the list of custom -# commands, e.g.: -# -# ['pip', 'install', 'my_package'], -# -# TODO(https://github.com/apache/beam/issues/18568): Output from the custom -# commands are missing from the logs. The output of custom commands (including -# failures) will be logged in the worker-startup log. -CUSTOM_COMMANDS = [['echo', 'Custom command worked!']] - - -class CustomCommands(setuptools.Command): - """A setuptools Command class able to run arbitrary commands.""" - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def RunCustomCommand(self, command_list): - print('Running command: %s' % command_list) - p = subprocess.Popen( - command_list, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) - # Can use communicate(input='y\n'.encode()) if the command run requires - # some confirmation. - stdout_data, _ = p.communicate() - print('Command output: %s' % stdout_data) - if p.returncode != 0: - raise RuntimeError( - 'Command %s failed: exit code: %s' % (command_list, p.returncode)) - - def run(self): - for command in CUSTOM_COMMANDS: - self.RunCustomCommand(command) - - -setuptools.setup( - packages=setuptools.find_packages(), - cmdclass={ - # Command class instantiated and run during pip install scenarios. - 'build': build, - 'CustomCommands': CustomCommands, - }) +setuptools.setup() diff --git a/sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/juliaset_test_it.py b/sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/juliaset_test_it.py index 0aac9f9cffb9..de4f8e9408c2 100644 --- a/sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/juliaset_test_it.py +++ b/sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/juliaset_test_it.py @@ -48,7 +48,7 @@ def test_run_example_with_setup_file(self): 'coordinate_output': coordinate_output, 'grid_size': self.GRID_SIZE, 'setup_file': os.path.normpath( - os.path.join(os.path.dirname(__file__), '..', 'setup.py')), + os.path.join(os.path.dirname(__file__), '../..', 'setup.py')), 'on_success_matcher': all_of(PipelineStateMatcher(PipelineState.DONE)), } args = pipeline.get_full_options_as_args(**extra_args) diff --git a/sdks/python/apache_beam/options/pipeline_options.py b/sdks/python/apache_beam/options/pipeline_options.py index 0f8457a40a7b..41be865f2354 100644 --- a/sdks/python/apache_beam/options/pipeline_options.py +++ b/sdks/python/apache_beam/options/pipeline_options.py @@ -1392,14 +1392,16 @@ def _add_argparse_args(cls, parser): '--setup_file', default=None, help=( - 'Path to a setup Python file containing package dependencies. If ' - 'specified, the file\'s containing folder is assumed to have the ' - 'structure required for a setuptools setup package. The file must ' - 'be named setup.py. More details: ' - 'https://pythonhosted.org/an_example_pypi_project/setuptools.html ' - 'During job submission a source distribution will be built and ' - 'the worker will install the resulting package before running any ' - 'custom code.')) + 'Path to a file in that defines a Python package that needs to be ' + 'installed on Beam workers. ' + 'If specified, the file\'s containing folder must have the ' + 'structure required for a Python package. The file must ' + 'be named setup.py or pyproject.toml. ' + 'For information on distributing Python packages, see: ' + 'https://packaging.python.org/ ' + 'During job submission a source distribution of the package will ' + 'be built and workers will install the resulting package before ' + 'running the pipeline.')) parser.add_argument( '--beam_plugin', '--beam_plugins', diff --git a/sdks/python/apache_beam/runners/portability/stager.py b/sdks/python/apache_beam/runners/portability/stager.py index 98c0e3176f75..abe60d37a63e 100644 --- a/sdks/python/apache_beam/runners/portability/stager.py +++ b/sdks/python/apache_beam/runners/portability/stager.py @@ -280,10 +280,11 @@ def create_job_resources(options, # type: PipelineOptions raise RuntimeError( 'The file %s cannot be found. It was specified in the ' '--setup_file command line option.' % setup_options.setup_file) - if os.path.basename(setup_options.setup_file) != 'setup.py': + if (os.path.basename(setup_options.setup_file) != 'setup.py' and + os.path.basename(setup_options.setup_file) != 'pyproject.toml'): raise RuntimeError( 'The --setup_file option expects the full path to a file named ' - 'setup.py instead of %s' % setup_options.setup_file) + 'setup.py or pyproject.toml, got: %s' % setup_options.setup_file) tarball_file = Stager._build_setup_package( setup_options.setup_file, temp_dir, build_setup_args) resources.append( diff --git a/sdks/python/test-suites/portable/common.gradle b/sdks/python/test-suites/portable/common.gradle index 4f232c5b104f..f578319e5162 100644 --- a/sdks/python/test-suites/portable/common.gradle +++ b/sdks/python/test-suites/portable/common.gradle @@ -92,10 +92,11 @@ tasks.register("portableLocalRunnerJuliaSetWithSetupPy") { && cd ${pythonRootDir} \\ && pip install -e . \\ && cd apache_beam/examples/complete/juliaset \\ + && pip install -e . \\ && python juliaset_main.py \\ --runner=PortableRunner \\ --job_endpoint=embed \\ - --setup_file=./setup.py \\ + --setup_file=./pyproject.toml \\ --coordinate_output=/tmp/juliaset \\ --grid_size=1 """ diff --git a/setup.py b/setup.py new file mode 100644 index 000000000000..357f45596736 --- /dev/null +++ b/setup.py @@ -0,0 +1,8 @@ +"""An optional setuptools configuration stub for the pipeline package. + +Use pyproject.toml to define the package. Add this file if you must +use the --setup_file pipeline option on Beam 2.59.0 or earlier. +""" + +import setuptools +setuptools.setup() diff --git a/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md b/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md index 2a2a515ec09b..e13551658b3f 100644 --- a/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md +++ b/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md @@ -108,7 +108,7 @@ Often, your pipeline code spans multiple files. To run your project remotely, yo # List Python packages your pipeline depends on. ] -2. If your package requires if some programmatic configuration, or you need to use the `--setup_file` pipeline option, create a setup.py file for your project. +2. If your package requires if some programmatic configuration, or you need to use the `--setup_file` pipeline option on Beam SDK 2.59.0 or earlier, create a setup.py file for your project. # Note that the package can be completely defined by pyproject.toml. # This file is optional. @@ -137,7 +137,8 @@ Often, your pipeline code spans multiple files. To run your project remotely, yo 6. Run your pipeline with the following command-line option: - --setup_file /path/to/setup.py + --setup_file /path/to/setup.py OR + --setup_file /path/to/pyproject.toml (requires Beam SDK 2.60.0 or later). **Note:** It is not necessary to supply the `--requirements_file` [option](#pypi-dependencies) if the dependencies of your package are defined in the `dependencies` field of the `pyproject.toml` file (see step 1). However unlike with the `--requirements_file` option, when you use the `--setup_file` option, Beam doesn't stage the dependent packages to the runner. @@ -147,19 +148,6 @@ Only the pipeline package is staged. If they aren't already provided in the runt ## Non-Python Dependencies or PyPI Dependencies with Non-Python Dependencies {#nonpython} If your pipeline uses non-Python packages, such as packages that require installation using the `apt install` command, or uses a PyPI package that depends on non-Python dependencies during package installation, we recommend installing them using a [custom container](#custom-containers). -Otherwise, you must perform the following steps. - -1. [Structure your pipeline as a package](#multiple-file-dependencies). - -2. Add the required installation commands for the non-Python dependencies, such as the `apt install` commands, to the list of `CUSTOM_COMMANDS` in your `setup.py` file. See the [Juliaset setup.py file](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/examples/complete/juliaset/setup.py) for an example. - - **Note:** You must verify that these commands run on the remote worker. For example, if you use `apt`, the remote worker needs `apt` support. - -3. Run your pipeline with the following command-line option: - - --setup_file /path/to/setup.py - -**Note:** Because custom commands execute after the dependencies for your workflow are installed (by `pip`), you should omit the PyPI package dependency from the pipeline's `requirements.txt` file and from the `install_requires` parameter in the `setuptools.setup()` call of your `setup.py` file. ## Pre-building SDK Container Image