From 9358eb557fad5979b10669ccc2cc2db0103ca6cf Mon Sep 17 00:00:00 2001 From: Chris Neffshade Date: Wed, 13 Sep 2023 10:51:21 +0000 Subject: [PATCH] Replace deprecated call to StringMethods In Pandas 2, pd.Series.str is the right way and still works with Pandas 1 --- sdks/python/apache_beam/dataframe/frames.py | 34 +++++++++---------- .../apache_beam/dataframe/frames_test.py | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/sdks/python/apache_beam/dataframe/frames.py b/sdks/python/apache_beam/dataframe/frames.py index 0d9b22ae3f9e..756e75b9583c 100644 --- a/sdks/python/apache_beam/dataframe/frames.py +++ b/sdks/python/apache_beam/dataframe/frames.py @@ -4907,9 +4907,9 @@ def __setitem__(self, index, value): class _DeferredStringMethods(frame_base.DeferredBase): - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) - @frame_base.populate_defaults(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) + @frame_base.populate_defaults(pd.Series.str) def cat(self, others, join, **kwargs): """If defined, ``others`` must be a :class:`DeferredSeries` or a ``list`` of ``DeferredSeries``.""" @@ -4949,8 +4949,8 @@ def func(*args): requires_partition_by=requires, preserves_partition_by=partitionings.Arbitrary())) - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) def repeat(self, repeats): """``repeats`` must be an ``int`` or a :class:`DeferredSeries`. Lists are not supported because they make this operation order-sensitive.""" @@ -4987,8 +4987,8 @@ def repeat(self, repeats): raise TypeError("str.repeat(repeats=) value must be an int or a " f"DeferredSeries (encountered {type(repeats)}).") - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) def get_dummies(self, **kwargs): """ Series must be categorical dtype. Please cast to ``CategoricalDtype`` @@ -5070,9 +5070,9 @@ def func(s): requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Arbitrary())) - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) - @frame_base.populate_defaults(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) + @frame_base.populate_defaults(pd.Series.str) def split(self, **kwargs): """ Like other non-deferred methods, dtype must be CategoricalDtype. @@ -5081,9 +5081,9 @@ def split(self, **kwargs): """ return self._split_helper(rsplit=False, **kwargs) - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) - @frame_base.populate_defaults(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) + @frame_base.populate_defaults(pd.Series.str) def rsplit(self, **kwargs): """ Like other non-deferred methods, dtype must be CategoricalDtype. @@ -5161,17 +5161,17 @@ def func(df, *args, **kwargs): return func for method in ELEMENTWISE_STRING_METHODS: - if not hasattr(pd.core.strings.StringMethods, method): + if not hasattr(pd.Series.str, method): # older versions (1.0.x) don't support some of these methods continue setattr(_DeferredStringMethods, method, frame_base._elementwise_method(make_str_func(method), name=method, - base=pd.core.strings.StringMethods)) + base=pd.Series.str)) for method in NON_ELEMENTWISE_STRING_METHODS: - if not hasattr(pd.core.strings.StringMethods, method): + if not hasattr(pd.Series.str, method): # older versions (1.0.x) don't support some of these methods continue setattr(_DeferredStringMethods, @@ -5179,7 +5179,7 @@ def func(df, *args, **kwargs): frame_base._proxy_method( make_str_func(method), name=method, - base=pd.core.strings.StringMethods, + base=pd.Series.str, requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Singleton())) diff --git a/sdks/python/apache_beam/dataframe/frames_test.py b/sdks/python/apache_beam/dataframe/frames_test.py index fa121aa85c30..d2d2b7339c10 100644 --- a/sdks/python/apache_beam/dataframe/frames_test.py +++ b/sdks/python/apache_beam/dataframe/frames_test.py @@ -2936,7 +2936,7 @@ class DocstringTest(unittest.TestCase): (frames.DeferredDataFrame, pd.DataFrame), (frames.DeferredSeries, pd.Series), #(frames._DeferredIndex, pd.Index), - (frames._DeferredStringMethods, pd.core.strings.StringMethods), + (frames._DeferredStringMethods, pd.Series.str), ( frames._DeferredCategoricalMethods, pd.core.arrays.categorical.CategoricalAccessor),