From 932744801e67c7d03560114a78881be62aadfa19 Mon Sep 17 00:00:00 2001 From: caneff Date: Wed, 20 Sep 2023 11:28:22 -0400 Subject: [PATCH] Change pd.core.strings.StringMethods for Pandas 2 compatability. (#28455) --- sdks/python/apache_beam/dataframe/frames.py | 34 +++++++++---------- .../apache_beam/dataframe/frames_test.py | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/sdks/python/apache_beam/dataframe/frames.py b/sdks/python/apache_beam/dataframe/frames.py index 80af501cfacb..a74ccbba041a 100644 --- a/sdks/python/apache_beam/dataframe/frames.py +++ b/sdks/python/apache_beam/dataframe/frames.py @@ -4931,9 +4931,9 @@ def __setitem__(self, index, value): class _DeferredStringMethods(frame_base.DeferredBase): - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) - @frame_base.populate_defaults(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) + @frame_base.populate_defaults(pd.Series.str) def cat(self, others, join, **kwargs): """If defined, ``others`` must be a :class:`DeferredSeries` or a ``list`` of ``DeferredSeries``.""" @@ -4973,8 +4973,8 @@ def func(*args): requires_partition_by=requires, preserves_partition_by=partitionings.Arbitrary())) - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) def repeat(self, repeats): """``repeats`` must be an ``int`` or a :class:`DeferredSeries`. Lists are not supported because they make this operation order-sensitive.""" @@ -5011,8 +5011,8 @@ def repeat(self, repeats): raise TypeError("str.repeat(repeats=) value must be an int or a " f"DeferredSeries (encountered {type(repeats)}).") - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) def get_dummies(self, **kwargs): """ Series must be categorical dtype. Please cast to ``CategoricalDtype`` @@ -5094,9 +5094,9 @@ def func(s): requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Arbitrary())) - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) - @frame_base.populate_defaults(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) + @frame_base.populate_defaults(pd.Series.str) def split(self, **kwargs): """ Like other non-deferred methods, dtype must be CategoricalDtype. @@ -5105,9 +5105,9 @@ def split(self, **kwargs): """ return self._split_helper(rsplit=False, **kwargs) - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) - @frame_base.populate_defaults(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) + @frame_base.populate_defaults(pd.Series.str) def rsplit(self, **kwargs): """ Like other non-deferred methods, dtype must be CategoricalDtype. @@ -5185,17 +5185,17 @@ def func(df, *args, **kwargs): return func for method in ELEMENTWISE_STRING_METHODS: - if not hasattr(pd.core.strings.StringMethods, method): + if not hasattr(pd.Series.str, method): # older versions (1.0.x) don't support some of these methods continue setattr(_DeferredStringMethods, method, frame_base._elementwise_method(make_str_func(method), name=method, - base=pd.core.strings.StringMethods)) + base=pd.Series.str)) for method in NON_ELEMENTWISE_STRING_METHODS: - if not hasattr(pd.core.strings.StringMethods, method): + if not hasattr(pd.Series.str, method): # older versions (1.0.x) don't support some of these methods continue setattr(_DeferredStringMethods, @@ -5203,7 +5203,7 @@ def func(df, *args, **kwargs): frame_base._proxy_method( make_str_func(method), name=method, - base=pd.core.strings.StringMethods, + base=pd.Series.str, requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Singleton())) diff --git a/sdks/python/apache_beam/dataframe/frames_test.py b/sdks/python/apache_beam/dataframe/frames_test.py index 257d77e0a6b3..4998683461b9 100644 --- a/sdks/python/apache_beam/dataframe/frames_test.py +++ b/sdks/python/apache_beam/dataframe/frames_test.py @@ -2986,7 +2986,7 @@ class DocstringTest(unittest.TestCase): (frames.DeferredDataFrame, pd.DataFrame), (frames.DeferredSeries, pd.Series), #(frames._DeferredIndex, pd.Index), - (frames._DeferredStringMethods, pd.core.strings.StringMethods), + (frames._DeferredStringMethods, pd.Series.str), ( frames._DeferredCategoricalMethods, pd.core.arrays.categorical.CategoricalAccessor),