Skip to content

Commit 9bcb6a8

Browse files
committed
update
1 parent 44abe87 commit 9bcb6a8

File tree

9 files changed

+33
-27
lines changed

9 files changed

+33
-27
lines changed

doc/source/whatsnew/v1.1.0.rst

-1
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,6 @@ Other enhancements
236236
- :meth:`DataFrame.sample` will now also allow array-like and BitGenerator objects to be passed to ``random_state`` as seeds (:issue:`32503`)
237237
- :meth:`MultiIndex.union` will now raise `RuntimeWarning` if the object inside are unsortable, pass `sort=False` to suppress this warning (:issue:`33015`)
238238
- :class:`Series.dt` and :class:`DatatimeIndex` now have an `isocalendar` method that returns a :class:`DataFrame` with year, week, and day calculated according to the ISO 8601 calendar (:issue:`33206`).
239-
- :meth:`Series.combine` has gained a ``dtype`` argument. If supplied, the combined series will get that dtype (:issue:`33465`)
240239
- The :meth:`DataFrame.to_feather` method now supports additional keyword
241240
arguments (e.g. to set the compression) that are added in pyarrow 0.17
242241
(:issue:`33422`).

pandas/core/arrays/datetimelike.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
is_datetime64tz_dtype,
2828
is_datetime_or_timedelta_dtype,
2929
is_dtype_equal,
30+
is_extension_array_dtype,
3031
is_float_dtype,
3132
is_integer_dtype,
3233
is_list_like,
@@ -619,7 +620,11 @@ def astype(self, dtype, copy=True):
619620
if is_object_dtype(dtype):
620621
return self._box_values(self.asi8.ravel()).reshape(self.shape)
621622
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
622-
return self._format_native_types()
623+
if is_extension_array_dtype(dtype):
624+
arr_cls = dtype.construct_array_type()
625+
return arr_cls._from_sequence(self, dtype=dtype)
626+
else:
627+
return self._format_native_types()
623628
elif is_integer_dtype(dtype):
624629
# we deliberately ignore int32 vs. int64 here.
625630
# See https://github.com/pandas-dev/pandas/issues/24381 for more.

pandas/core/arrays/integer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import numbers
2-
from typing import TYPE_CHECKING, List, Optional, Dict, Tuple, Type, Union
2+
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, Union
33
import warnings
44

55
import numpy as np

pandas/core/arrays/period.py

-1
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,6 @@ def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs):
564564
actually format my specific types
565565
"""
566566
values = self.astype(object)
567-
568567
if date_format:
569568
formatter = lambda dt: dt.strftime(date_format)
570569
else:

pandas/core/arrays/string_.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,6 @@ class StringArray(PandasArray):
104104
105105
.. versionadded:: 1.0.0
106106
107-
.. versionchanged:: 1.1.0
108-
109-
``StringArray`` allow non-string input values, but will always convert the
110-
values to strings. (Before Pandas 1.1 non-string values were not allowed).
111-
112107
.. warning::
113108
114109
StringArray is considered experimental. The implementation and
@@ -157,9 +152,13 @@ class StringArray(PandasArray):
157152
['This is', 'some text', <NA>, 'data.']
158153
Length: 4, dtype: string
159154
160-
Like ``object`` dtype arrays instantiated with ``dtype="str"``, ``StringArray``
161-
allows non-string values but will always convert the values to strings.
155+
Unlike arrays instantiated with ``dtype="object"``, ``StringArray``
156+
will convert the values to strings.
162157
158+
>>> pd.array(['1', 1], dtype="object")
159+
<PandasArray>
160+
['1', 1]
161+
Length: 2, dtype: object
163162
>>> pd.array(['1', 1], dtype="string")
164163
<StringArray>
165164
['1', '1']

pandas/core/series.py

+4-9
Original file line numberDiff line numberDiff line change
@@ -2695,11 +2695,6 @@ def combine(self, other, func, fill_value=None, dtype=None) -> "Series":
26952695
The value to assume when an index is missing from
26962696
one Series or the other. The default specifies to use the
26972697
appropriate NaN value for the underlying dtype of the Series.
2698-
dtype : str, numpy.dtype, or ExtensionDtype, optional
2699-
Data type for the output Series. If not specified, this will be
2700-
inferred from the combined data.
2701-
2702-
.. versionadded:: 1.1.0
27032698
27042699
Returns
27052700
-------
@@ -2770,13 +2765,13 @@ def combine(self, other, func, fill_value=None, dtype=None) -> "Series":
27702765
new_values = [func(lv, other) for lv in self._values]
27712766
new_name = self.name
27722767

2773-
if dtype is not None:
2774-
return self._constructor(
2775-
new_values, index=new_index, name=new_name, dtype=dtype
2776-
)
27772768
if is_categorical_dtype(self.dtype):
27782769
pass
27792770
elif is_extension_array_dtype(self.dtype):
2771+
# Everything can be be converted to strings, but we may not want to convert
2772+
if self.dtype == "string" and lib.infer_dtype(new_values) != "string":
2773+
return self._constructor(new_values, index=new_index, name=new_name)
2774+
27802775
# TODO: can we do this for only SparseDtype?
27812776
# The function can return something of any type, so check
27822777
# if the type is compatible with the calling EA.

pandas/tests/extension/base/casting.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,13 @@ def test_tolist(self, data):
3333

3434
def test_astype_str(self, data):
3535
result = pd.Series(data[:5]).astype(str)
36-
expected = pd.Series(data[:5].astype(str))
36+
expected = pd.Series([str(x) for x in data[:5]], dtype=str)
3737
self.assert_series_equal(result, expected)
3838

3939
def test_astype_string(self, data):
40+
# GH-33465
4041
result = pd.Series(data[:5]).astype("string")
41-
expected = pd.Series(data[:5].astype("string"))
42+
expected = pd.Series([str(x) for x in data[:5]], dtype="string")
4243
self.assert_series_equal(result, expected)
4344

4445
def test_to_numpy(self, data):

pandas/tests/extension/base/methods.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -188,16 +188,15 @@ def test_combine_le(self, data_repeated):
188188
orig_data1, orig_data2 = data_repeated(2)
189189
s1 = pd.Series(orig_data1)
190190
s2 = pd.Series(orig_data2)
191-
result = s1.combine(s2, lambda x1, x2: x1 <= x2, dtype="boolean")
191+
result = s1.combine(s2, lambda x1, x2: x1 <= x2)
192192
expected = pd.Series(
193-
[a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))],
194-
dtype="boolean",
193+
[a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))]
195194
)
196195
self.assert_series_equal(result, expected)
197196

198197
val = s1.iloc[0]
199-
result = s1.combine(val, lambda x1, x2: x1 <= x2, dtype="boolean")
200-
expected = pd.Series([a <= val for a in list(orig_data1)], dtype="boolean")
198+
result = s1.combine(val, lambda x1, x2: x1 <= x2)
199+
expected = pd.Series([a <= val for a in list(orig_data1)])
201200
self.assert_series_equal(result, expected)
202201

203202
def test_combine_add(self, data_repeated):

pandas/tests/extension/test_sparse.py

+9
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,15 @@ def test_astype_object_frame(self, all_data):
343343
# comp = result.dtypes.equals(df.dtypes)
344344
# assert not comp.any()
345345

346+
@pytest.mark.xfail(raises=AssertionError, reason="no sparse str dtype")
347+
def test_astype_str(self, data):
348+
# Sparse arrays don't support str dtype
349+
super().test_astype_str(data)
350+
351+
@pytest.mark.xfail(raises=AssertionError, reason="no sparse StringDtype")
352+
def test_astype_string(self, data):
353+
super().test_astype_string(data)
354+
346355

347356
class TestArithmeticOps(BaseSparseTests, base.BaseArithmeticOpsTests):
348357
series_scalar_exc = None

0 commit comments

Comments
 (0)