Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better flag documentation. #28753

Merged
merged 2 commits into from
Oct 2, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 7 additions & 13 deletions sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,6 @@ def __init__(self, cache=None):
def is_fnapi_compatible(self):
return False

def apply(self, transform, input, options):
_check_and_add_missing_options(options)
return super().apply(transform, input, options)

@staticmethod
def poll_for_job_completion(
runner, result, duration, state_update_callback=None):
Expand Down Expand Up @@ -496,10 +492,6 @@ def _get_coder(typehint, window_coder):
coders.registry.get_coder(typehint), window_coder=window_coder)
return coders.registry.get_coder(typehint)

# TODO(srohde): Remove this after internal usages have been removed.
def apply_GroupByKey(self, transform, pcoll, options):
return transform.expand(pcoll)

def _verify_gbk_coders(self, transform, pcoll):
# Infer coder of parent.
#
Expand Down Expand Up @@ -589,12 +581,14 @@ def _check_and_add_missing_options(options):
sdk_location = options.view_as(SetupOptions).sdk_location
if 'dev' in beam.version.__version__ and sdk_location == 'default':
raise ValueError(
"When launching Dataflow Jobs with an unreleased SDK, "
"You are submitting a pipeline with Apache Beam Python SDK "
f"{beam.version.__version__}. "
"When launching Dataflow jobs with an unreleased (dev) SDK, "
"please provide an SDK distribution in the --sdk_location option "
"to use consistent SDK version at "
"pipeline submission and runtime. To ignore this error and use the "
"SDK installed in Dataflow dev containers, use "
"--sdk_location=container.")
"to use a consistent SDK version at "
"pipeline submission and runtime. To ignore this error and use "
"an SDK preinstalled in the default Dataflow dev runtime environment "
"or in a custom container image, use --sdk_location=container.")

# Streaming only supports using runner v2 (aka unified worker).
# Runner v2 only supports using streaming engine (aka windmill service)
Expand Down
Loading