diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 9fa6e9973291d..75866c6f6013a 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -44,6 +44,7 @@ ) from pandas.core.dtypes.missing import isna +from pandas.core.arrays import ExtensionArray from pandas.core.base import NoNewAttributesMixin from pandas.core.construction import extract_array @@ -456,7 +457,7 @@ def _get_series_list(self, others): # in case of list-like `others`, all elements must be # either Series/Index/np.ndarray (1-dim)... if all( - isinstance(x, (ABCSeries, ABCIndex)) + isinstance(x, (ABCSeries, ABCIndex, ExtensionArray)) or (isinstance(x, np.ndarray) and x.ndim == 1) for x in others ): @@ -690,12 +691,15 @@ def cat( out: Index | Series if isinstance(self._orig, ABCIndex): # add dtype for case that result is all-NA + dtype = None + if isna(result).all(): + dtype = object - out = Index(result, dtype=object, name=self._orig.name) + out = Index(result, dtype=dtype, name=self._orig.name) else: # Series if isinstance(self._orig.dtype, CategoricalDtype): # We need to infer the new categories. - dtype = None + dtype = self._orig.dtype.categories.dtype # type: ignore[assignment] else: dtype = self._orig.dtype res_ser = Series( @@ -914,7 +918,13 @@ def split( if is_re(pat): regex = True result = self._data.array._str_split(pat, n, expand, regex) - return self._wrap_result(result, returns_string=expand, expand=expand) + if self._data.dtype == "category": + dtype = self._data.dtype.categories.dtype + else: + dtype = object if self._data.dtype == object else None + return self._wrap_result( + result, expand=expand, returns_string=expand, dtype=dtype + ) @Appender( _shared_docs["str_split"] @@ -932,7 +942,10 @@ def split( @forbid_nonstring_types(["bytes"]) def rsplit(self, pat=None, *, n=-1, expand: bool = False): result = self._data.array._str_rsplit(pat, n=n) - return self._wrap_result(result, expand=expand, returns_string=expand) + dtype = object if self._data.dtype == object else None + return self._wrap_result( + result, expand=expand, returns_string=expand, dtype=dtype + ) _shared_docs[ "str_partition" @@ -1028,7 +1041,13 @@ def rsplit(self, pat=None, *, n=-1, expand: bool = False): @forbid_nonstring_types(["bytes"]) def partition(self, sep: str = " ", expand: bool = True): result = self._data.array._str_partition(sep, expand) - return self._wrap_result(result, expand=expand, returns_string=expand) + if self._data.dtype == "category": + dtype = self._data.dtype.categories.dtype + else: + dtype = object if self._data.dtype == object else None + return self._wrap_result( + result, expand=expand, returns_string=expand, dtype=dtype + ) @Appender( _shared_docs["str_partition"] @@ -1042,7 +1061,13 @@ def partition(self, sep: str = " ", expand: bool = True): @forbid_nonstring_types(["bytes"]) def rpartition(self, sep: str = " ", expand: bool = True): result = self._data.array._str_rpartition(sep, expand) - return self._wrap_result(result, expand=expand, returns_string=expand) + if self._data.dtype == "category": + dtype = self._data.dtype.categories.dtype + else: + dtype = object if self._data.dtype == object else None + return self._wrap_result( + result, expand=expand, returns_string=expand, dtype=dtype + ) def get(self, i): """ @@ -2748,7 +2773,7 @@ def extract( else: name = _get_single_group_name(regex) result = self._data.array._str_extract(pat, flags=flags, expand=returns_df) - return self._wrap_result(result, name=name) + return self._wrap_result(result, name=name, dtype=result_dtype) @forbid_nonstring_types(["bytes"]) def extractall(self, pat, flags: int = 0) -> DataFrame: @@ -3488,7 +3513,7 @@ def str_extractall(arr, pat, flags: int = 0) -> DataFrame: raise ValueError("pattern contains no capture groups") if isinstance(arr, ABCIndex): - arr = arr.to_series().reset_index(drop=True) + arr = arr.to_series().reset_index(drop=True).astype(arr.dtype) columns = _get_group_names(regex) match_list = [] diff --git a/pandas/tests/strings/test_api.py b/pandas/tests/strings/test_api.py index 2914b22a52e94..31e005466af7b 100644 --- a/pandas/tests/strings/test_api.py +++ b/pandas/tests/strings/test_api.py @@ -2,11 +2,13 @@ import pytest from pandas import ( + CategoricalDtype, DataFrame, Index, MultiIndex, Series, _testing as tm, + option_context, ) from pandas.core.strings.accessor import StringMethods @@ -162,7 +164,8 @@ def test_api_per_method( if inferred_dtype in allowed_types: # xref GH 23555, GH 23556 - method(*args, **kwargs) # works! + with option_context("future.no_silent_downcasting", True): + method(*args, **kwargs) # works! else: # GH 23011, GH 23163 msg = ( @@ -178,6 +181,7 @@ def test_api_for_categorical(any_string_method, any_string_dtype): s = Series(list("aabb"), dtype=any_string_dtype) s = s + " " + s c = s.astype("category") + c = c.astype(CategoricalDtype(c.dtype.categories.astype("object"))) assert isinstance(c.str, StringMethods) method_name, args, kwargs = any_string_method diff --git a/pandas/tests/strings/test_case_justify.py b/pandas/tests/strings/test_case_justify.py index 1dee25e631648..41aedae90ca76 100644 --- a/pandas/tests/strings/test_case_justify.py +++ b/pandas/tests/strings/test_case_justify.py @@ -21,7 +21,8 @@ def test_title_mixed_object(): s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0]) result = s.str.title() expected = Series( - ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", None, np.nan, np.nan] + ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", None, np.nan, np.nan], + dtype=object, ) tm.assert_almost_equal(result, expected) @@ -41,11 +42,15 @@ def test_lower_upper_mixed_object(): s = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0]) result = s.str.upper() - expected = Series(["A", np.nan, "B", np.nan, np.nan, "FOO", None, np.nan, np.nan]) + expected = Series( + ["A", np.nan, "B", np.nan, np.nan, "FOO", None, np.nan, np.nan], dtype=object + ) tm.assert_series_equal(result, expected) result = s.str.lower() - expected = Series(["a", np.nan, "b", np.nan, np.nan, "foo", None, np.nan, np.nan]) + expected = Series( + ["a", np.nan, "b", np.nan, np.nan, "foo", None, np.nan, np.nan], dtype=object + ) tm.assert_series_equal(result, expected) @@ -71,7 +76,8 @@ def test_capitalize_mixed_object(): s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0]) result = s.str.capitalize() expected = Series( - ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", None, np.nan, np.nan] + ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", None, np.nan, np.nan], + dtype=object, ) tm.assert_series_equal(result, expected) @@ -87,7 +93,8 @@ def test_swapcase_mixed_object(): s = Series(["FOO", np.nan, "bar", True, datetime.today(), "Blah", None, 1, 2.0]) result = s.str.swapcase() expected = Series( - ["foo", np.nan, "BAR", np.nan, np.nan, "bLAH", None, np.nan, np.nan] + ["foo", np.nan, "BAR", np.nan, np.nan, "bLAH", None, np.nan, np.nan], + dtype=object, ) tm.assert_series_equal(result, expected) @@ -138,19 +145,22 @@ def test_pad_mixed_object(): result = s.str.pad(5, side="left") expected = Series( - [" a", np.nan, " b", np.nan, np.nan, " ee", None, np.nan, np.nan] + [" a", np.nan, " b", np.nan, np.nan, " ee", None, np.nan, np.nan], + dtype=object, ) tm.assert_series_equal(result, expected) result = s.str.pad(5, side="right") expected = Series( - ["a ", np.nan, "b ", np.nan, np.nan, "ee ", None, np.nan, np.nan] + ["a ", np.nan, "b ", np.nan, np.nan, "ee ", None, np.nan, np.nan], + dtype=object, ) tm.assert_series_equal(result, expected) result = s.str.pad(5, side="both") expected = Series( - [" a ", np.nan, " b ", np.nan, np.nan, " ee ", None, np.nan, np.nan] + [" a ", np.nan, " b ", np.nan, np.nan, " ee ", None, np.nan, np.nan], + dtype=object, ) tm.assert_series_equal(result, expected) @@ -238,7 +248,8 @@ def test_center_ljust_rjust_mixed_object(): None, np.nan, np.nan, - ] + ], + dtype=object, ) tm.assert_series_equal(result, expected) @@ -255,7 +266,8 @@ def test_center_ljust_rjust_mixed_object(): None, np.nan, np.nan, - ] + ], + dtype=object, ) tm.assert_series_equal(result, expected) @@ -272,7 +284,8 @@ def test_center_ljust_rjust_mixed_object(): None, np.nan, np.nan, - ] + ], + dtype=object, ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py index 3e620b7664335..284932491a65e 100644 --- a/pandas/tests/strings/test_cat.py +++ b/pandas/tests/strings/test_cat.py @@ -3,6 +3,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import ( DataFrame, Index, @@ -10,6 +12,7 @@ Series, _testing as tm, concat, + option_context, ) @@ -26,45 +29,49 @@ def test_str_cat_name(index_or_series, other): assert result.name == "name" -def test_str_cat(index_or_series): - box = index_or_series - # test_cat above tests "str_cat" from ndarray; - # here testing "str.cat" from Series/Index to ndarray/list - s = box(["a", "a", "b", "b", "c", np.nan]) +@pytest.mark.parametrize( + "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))] +) +def test_str_cat(index_or_series, infer_string): + with option_context("future.infer_string", infer_string): + box = index_or_series + # test_cat above tests "str_cat" from ndarray; + # here testing "str.cat" from Series/Index to ndarray/list + s = box(["a", "a", "b", "b", "c", np.nan]) - # single array - result = s.str.cat() - expected = "aabbc" - assert result == expected + # single array + result = s.str.cat() + expected = "aabbc" + assert result == expected - result = s.str.cat(na_rep="-") - expected = "aabbc-" - assert result == expected + result = s.str.cat(na_rep="-") + expected = "aabbc-" + assert result == expected - result = s.str.cat(sep="_", na_rep="NA") - expected = "a_a_b_b_c_NA" - assert result == expected + result = s.str.cat(sep="_", na_rep="NA") + expected = "a_a_b_b_c_NA" + assert result == expected - t = np.array(["a", np.nan, "b", "d", "foo", np.nan], dtype=object) - expected = box(["aa", "a-", "bb", "bd", "cfoo", "--"]) + t = np.array(["a", np.nan, "b", "d", "foo", np.nan], dtype=object) + expected = box(["aa", "a-", "bb", "bd", "cfoo", "--"]) - # Series/Index with array - result = s.str.cat(t, na_rep="-") - tm.assert_equal(result, expected) + # Series/Index with array + result = s.str.cat(t, na_rep="-") + tm.assert_equal(result, expected) - # Series/Index with list - result = s.str.cat(list(t), na_rep="-") - tm.assert_equal(result, expected) + # Series/Index with list + result = s.str.cat(list(t), na_rep="-") + tm.assert_equal(result, expected) - # errors for incorrect lengths - rgx = r"If `others` contains arrays or lists \(or other list-likes.*" - z = Series(["1", "2", "3"]) + # errors for incorrect lengths + rgx = r"If `others` contains arrays or lists \(or other list-likes.*" + z = Series(["1", "2", "3"]) - with pytest.raises(ValueError, match=rgx): - s.str.cat(z.values) + with pytest.raises(ValueError, match=rgx): + s.str.cat(z.values) - with pytest.raises(ValueError, match=rgx): - s.str.cat(list(z)) + with pytest.raises(ValueError, match=rgx): + s.str.cat(list(z)) def test_str_cat_raises_intuitive_error(index_or_series): @@ -78,39 +85,54 @@ def test_str_cat_raises_intuitive_error(index_or_series): s.str.cat(" ") +@pytest.mark.parametrize( + "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))] +) @pytest.mark.parametrize("sep", ["", None]) @pytest.mark.parametrize("dtype_target", ["object", "category"]) @pytest.mark.parametrize("dtype_caller", ["object", "category"]) -def test_str_cat_categorical(index_or_series, dtype_caller, dtype_target, sep): +def test_str_cat_categorical( + index_or_series, dtype_caller, dtype_target, sep, infer_string +): box = index_or_series - s = Index(["a", "a", "b", "a"], dtype=dtype_caller) - s = s if box == Index else Series(s, index=s) - t = Index(["b", "a", "b", "c"], dtype=dtype_target) - - expected = Index(["ab", "aa", "bb", "ac"]) - expected = expected if box == Index else Series(expected, index=s) + with option_context("future.infer_string", infer_string): + s = Index(["a", "a", "b", "a"], dtype=dtype_caller) + s = s if box == Index else Series(s, index=s) + t = Index(["b", "a", "b", "c"], dtype=dtype_target) - # Series/Index with unaligned Index -> t.values - result = s.str.cat(t.values, sep=sep) - tm.assert_equal(result, expected) - - # Series/Index with Series having matching Index - t = Series(t.values, index=s) - result = s.str.cat(t, sep=sep) - tm.assert_equal(result, expected) - - # Series/Index with Series.values - result = s.str.cat(t.values, sep=sep) - tm.assert_equal(result, expected) + expected = Index(["ab", "aa", "bb", "ac"]) + expected = ( + expected + if box == Index + else Series(expected, index=Index(s, dtype=dtype_caller)) + ) - # Series/Index with Series having different Index - t = Series(t.values, index=t.values) - expected = Index(["aa", "aa", "bb", "bb", "aa"]) - expected = expected if box == Index else Series(expected, index=expected.str[:1]) + # Series/Index with unaligned Index -> t.values + result = s.str.cat(t.values, sep=sep) + tm.assert_equal(result, expected) + + # Series/Index with Series having matching Index + t = Series(t.values, index=Index(s, dtype=dtype_caller)) + result = s.str.cat(t, sep=sep) + tm.assert_equal(result, expected) + + # Series/Index with Series.values + result = s.str.cat(t.values, sep=sep) + tm.assert_equal(result, expected) + + # Series/Index with Series having different Index + t = Series(t.values, index=t.values) + expected = Index(["aa", "aa", "bb", "bb", "aa"]) + dtype = object if dtype_caller == "object" else s.dtype.categories.dtype + expected = ( + expected + if box == Index + else Series(expected, index=Index(expected.str[:1], dtype=dtype)) + ) - result = s.str.cat(t, sep=sep) - tm.assert_equal(result, expected) + result = s.str.cat(t, sep=sep) + tm.assert_equal(result, expected) @pytest.mark.parametrize( @@ -321,8 +343,9 @@ def test_str_cat_all_na(index_or_series, index_or_series2): # all-NA target if box == Series: - expected = Series([np.nan] * 4, index=s.index, dtype=object) + expected = Series([np.nan] * 4, index=s.index, dtype=s.dtype) else: # box == Index + # TODO: Strimg option, this should return string dtype expected = Index([np.nan] * 4, dtype=object) result = s.str.cat(t, join="left") tm.assert_equal(result, expected) diff --git a/pandas/tests/strings/test_extract.py b/pandas/tests/strings/test_extract.py index 9ad9b1eca41d9..77d008c650264 100644 --- a/pandas/tests/strings/test_extract.py +++ b/pandas/tests/strings/test_extract.py @@ -47,13 +47,16 @@ def test_extract_expand_False_mixed_object(): # two groups result = ser.str.extract(".*(BAD[_]+).*(BAD)", expand=False) er = [np.nan, np.nan] # empty row - expected = DataFrame([["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er]) + expected = DataFrame( + [["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er], dtype=object + ) tm.assert_frame_equal(result, expected) # single group result = ser.str.extract(".*(BAD[_]+).*BAD", expand=False) expected = Series( - ["BAD_", np.nan, "BAD_", np.nan, np.nan, np.nan, None, np.nan, np.nan] + ["BAD_", np.nan, "BAD_", np.nan, np.nan, np.nan, None, np.nan, np.nan], + dtype=object, ) tm.assert_series_equal(result, expected) @@ -238,7 +241,9 @@ def test_extract_expand_True_mixed_object(): ) result = mixed.str.extract(".*(BAD[_]+).*(BAD)", expand=True) - expected = DataFrame([["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er]) + expected = DataFrame( + [["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er], dtype=object + ) tm.assert_frame_equal(result, expected) @@ -603,8 +608,8 @@ def test_extractall_stringindex(any_string_dtype): # index.name doesn't affect to the result if any_string_dtype == "object": for idx in [ - Index(["a1a2", "b1", "c1"]), - Index(["a1a2", "b1", "c1"], name="xxx"), + Index(["a1a2", "b1", "c1"], dtype=object), + Index(["a1a2", "b1", "c1"], name="xxx", dtype=object), ]: result = idx.str.extractall(r"[ab](?P\d)") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index bd64a5dce3b9a..3f58c6d703f8f 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -242,7 +242,7 @@ def test_contains_nan(any_string_dtype): @pytest.mark.parametrize("pat", ["foo", ("foo", "baz")]) -@pytest.mark.parametrize("dtype", [None, "category"]) +@pytest.mark.parametrize("dtype", ["object", "category"]) @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA]) @pytest.mark.parametrize("na", [True, False]) def test_startswith(pat, dtype, null_value, na): @@ -254,10 +254,10 @@ def test_startswith(pat, dtype, null_value, na): result = values.str.startswith(pat) exp = Series([False, np.nan, True, False, False, np.nan, True]) - if dtype is None and null_value is pd.NA: + if dtype == "object" and null_value is pd.NA: # GH#18463 exp = exp.fillna(null_value) - elif dtype is None and null_value is None: + elif dtype == "object" and null_value is None: exp[exp.isna()] = None tm.assert_series_equal(result, exp) @@ -300,7 +300,7 @@ def test_startswith_nullable_string_dtype(nullable_string_dtype, na): @pytest.mark.parametrize("pat", ["foo", ("foo", "baz")]) -@pytest.mark.parametrize("dtype", [None, "category"]) +@pytest.mark.parametrize("dtype", ["object", "category"]) @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA]) @pytest.mark.parametrize("na", [True, False]) def test_endswith(pat, dtype, null_value, na): @@ -312,10 +312,10 @@ def test_endswith(pat, dtype, null_value, na): result = values.str.endswith(pat) exp = Series([False, np.nan, False, False, True, np.nan, True]) - if dtype is None and null_value is pd.NA: + if dtype == "object" and null_value is pd.NA: # GH#18463 - exp = exp.fillna(pd.NA) - elif dtype is None and null_value is None: + exp = exp.fillna(null_value) + elif dtype == "object" and null_value is None: exp[exp.isna()] = None tm.assert_series_equal(result, exp) @@ -382,7 +382,9 @@ def test_replace_mixed_object(): ["aBAD", np.nan, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0] ) result = Series(ser).str.replace("BAD[_]*", "", regex=True) - expected = Series(["a", np.nan, "b", np.nan, np.nan, "foo", None, np.nan, np.nan]) + expected = Series( + ["a", np.nan, "b", np.nan, np.nan, "foo", None, np.nan, np.nan], dtype=object + ) tm.assert_series_equal(result, expected) @@ -469,7 +471,9 @@ def test_replace_compiled_regex_mixed_object(): ["aBAD", np.nan, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0] ) result = Series(ser).str.replace(pat, "", regex=True) - expected = Series(["a", np.nan, "b", np.nan, np.nan, "foo", None, np.nan, np.nan]) + expected = Series( + ["a", np.nan, "b", np.nan, np.nan, "foo", None, np.nan, np.nan], dtype=object + ) tm.assert_series_equal(result, expected) @@ -913,7 +917,7 @@ def test_translate_mixed_object(): # Series with non-string values s = Series(["a", "b", "c", 1.2]) table = str.maketrans("abc", "cde") - expected = Series(["c", "d", "e", np.nan]) + expected = Series(["c", "d", "e", np.nan], dtype=object) result = s.str.translate(table) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py index 0a7d409773dd6..9ff1fc0e13ae9 100644 --- a/pandas/tests/strings/test_split_partition.py +++ b/pandas/tests/strings/test_split_partition.py @@ -681,14 +681,16 @@ def test_partition_sep_kwarg(any_string_dtype, method): def test_get(): ser = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) result = ser.str.split("_").str.get(1) - expected = Series(["b", "d", np.nan, "g"]) + expected = Series(["b", "d", np.nan, "g"], dtype=object) tm.assert_series_equal(result, expected) def test_get_mixed_object(): ser = Series(["a_b_c", np.nan, "c_d_e", True, datetime.today(), None, 1, 2.0]) result = ser.str.split("_").str.get(1) - expected = Series(["b", np.nan, "d", np.nan, np.nan, None, np.nan, np.nan]) + expected = Series( + ["b", np.nan, "d", np.nan, np.nan, None, np.nan, np.nan], dtype=object + ) tm.assert_series_equal(result, expected) @@ -696,7 +698,7 @@ def test_get_mixed_object(): def test_get_bounds(idx): ser = Series(["1_2_3_4_5", "6_7_8_9_10", "11_12"]) result = ser.str.split("_").str.get(idx) - expected = Series(["3", "8", np.nan]) + expected = Series(["3", "8", np.nan], dtype=object) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py index a88dcc8956931..0b3f368afea5e 100644 --- a/pandas/tests/strings/test_string_array.py +++ b/pandas/tests/strings/test_string_array.py @@ -8,6 +8,7 @@ DataFrame, Series, _testing as tm, + option_context, ) @@ -56,7 +57,8 @@ def test_string_array(nullable_string_dtype, any_string_method): columns = expected.select_dtypes(include="object").columns assert all(result[columns].dtypes == nullable_string_dtype) result[columns] = result[columns].astype(object) - expected[columns] = expected[columns].fillna(NA) # GH#18463 + with option_context("future.no_silent_downcasting", True): + expected[columns] = expected[columns].fillna(NA) # GH#18463 tm.assert_equal(result, expected) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 4315835b70a40..f662dfd7e2b14 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -76,7 +76,8 @@ def test_repeat_mixed_object(): ser = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0]) result = ser.str.repeat(3) expected = Series( - ["aaa", np.nan, "bbb", np.nan, np.nan, "foofoofoo", None, np.nan, np.nan] + ["aaa", np.nan, "bbb", np.nan, np.nan, "foofoofoo", None, np.nan, np.nan], + dtype=object, ) tm.assert_series_equal(result, expected) @@ -270,7 +271,8 @@ def test_spilt_join_roundtrip_mixed_object(): ) result = ser.str.split("_").str.join("_") expected = Series( - ["a_b", np.nan, "asdf_cas_asdf", np.nan, np.nan, "foo", None, np.nan, np.nan] + ["a_b", np.nan, "asdf_cas_asdf", np.nan, np.nan, "foo", None, np.nan, np.nan], + dtype=object, ) tm.assert_series_equal(result, expected) @@ -398,7 +400,7 @@ def test_slice(start, stop, step, expected, any_string_dtype): def test_slice_mixed_object(start, stop, step, expected): ser = Series(["aafootwo", np.nan, "aabartwo", True, datetime.today(), None, 1, 2.0]) result = ser.str.slice(start, stop, step) - expected = Series(expected) + expected = Series(expected, dtype=object) tm.assert_series_equal(result, expected) @@ -453,7 +455,7 @@ def test_strip_lstrip_rstrip_mixed_object(method, exp): ser = Series([" aa ", np.nan, " bb \t\n", True, datetime.today(), None, 1, 2.0]) result = getattr(ser.str, method)() - expected = Series(exp + [np.nan, np.nan, None, np.nan, np.nan]) + expected = Series(exp + [np.nan, np.nan, None, np.nan, np.nan], dtype=object) tm.assert_series_equal(result, expected) @@ -529,7 +531,7 @@ def test_string_slice_out_of_bounds(any_string_dtype): def test_encode_decode(any_string_dtype): ser = Series(["a", "b", "a\xe4"], dtype=any_string_dtype).str.encode("utf-8") result = ser.str.decode("utf-8") - expected = ser.map(lambda x: x.decode("utf-8")) + expected = ser.map(lambda x: x.decode("utf-8")).astype(object) tm.assert_series_equal(result, expected) @@ -559,7 +561,7 @@ def test_decode_errors_kwarg(): ser.str.decode("cp1252") result = ser.str.decode("cp1252", "ignore") - expected = ser.map(lambda x: x.decode("cp1252", "ignore")) + expected = ser.map(lambda x: x.decode("cp1252", "ignore")).astype(object) tm.assert_series_equal(result, expected) @@ -672,7 +674,7 @@ def test_str_accessor_in_apply_func(): def test_zfill(): # https://github.com/pandas-dev/pandas/issues/20868 value = Series(["-1", "1", "1000", 10, np.nan]) - expected = Series(["-01", "001", "1000", np.nan, np.nan]) + expected = Series(["-01", "001", "1000", np.nan, np.nan], dtype=object) tm.assert_series_equal(value.str.zfill(3), expected) value = Series(["-2", "+5"]) @@ -704,10 +706,10 @@ def test_get_with_dict_label(): ] ) result = s.str.get("name") - expected = Series(["Hello", "Goodbye", None]) + expected = Series(["Hello", "Goodbye", None], dtype=object) tm.assert_series_equal(result, expected) result = s.str.get("value") - expected = Series(["World", "Planet", "Sea"]) + expected = Series(["World", "Planet", "Sea"], dtype=object) tm.assert_series_equal(result, expected)