Skip to content

Commit b036ddf

Browse files
mroeschkephofl
authored andcommitted
DEPR: Enforce empty Series returning object dtype (pandas-dev#49342)
* DEPR: Enforce empty Series returning object dtype * Fix some tests & simplify * only for list like types * len(data)
1 parent f3550de commit b036ddf

File tree

17 files changed

+45
-148
lines changed

17 files changed

+45
-148
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ Removal of prior version deprecations/changes
279279
- Removed the ``display.column_space`` option in favor of ``df.to_string(col_space=...)`` (:issue:`47280`)
280280
- Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`)
281281
- Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`)
282+
- Changed behavior of empty data passed into :class:`Series`; the default dtype will be ``object`` instead of ``float64`` (:issue:`29405`)
282283
- Changed the behavior of :func:`to_datetime` with argument "now" with ``utc=False`` to match ``Timestamp("now")`` (:issue:`18705`)
283284
- Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`)
284285
- Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)

pandas/core/apply.py

+4-9
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,7 @@
5858
from pandas.core.algorithms import safe_sort
5959
from pandas.core.base import SelectionMixin
6060
import pandas.core.common as com
61-
from pandas.core.construction import (
62-
create_series_with_explicit_dtype,
63-
ensure_wrapped_if_datetimelike,
64-
)
61+
from pandas.core.construction import ensure_wrapped_if_datetimelike
6562

6663
if TYPE_CHECKING:
6764
from pandas import (
@@ -881,14 +878,12 @@ def wrap_results(self, results: ResType, res_index: Index) -> DataFrame | Series
881878

882879
# dict of scalars
883880

884-
# the default dtype of an empty Series will be `object`, but this
881+
# the default dtype of an empty Series is `object`, but this
885882
# code can be hit by df.mean() where the result should have dtype
886883
# float64 even if it's an empty Series.
887884
constructor_sliced = self.obj._constructor_sliced
888-
if constructor_sliced is Series:
889-
result = create_series_with_explicit_dtype(
890-
results, dtype_if_empty=np.float64
891-
)
885+
if len(results) == 0 and constructor_sliced is Series:
886+
result = constructor_sliced(results, dtype=np.float64)
892887
else:
893888
result = constructor_sliced(results)
894889
result.index = res_index

pandas/core/base.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@
7171
from pandas.core.arraylike import OpsMixin
7272
from pandas.core.arrays import ExtensionArray
7373
from pandas.core.construction import (
74-
create_series_with_explicit_dtype,
7574
ensure_wrapped_if_datetimelike,
7675
extract_array,
7776
)
@@ -842,9 +841,12 @@ def _map_values(self, mapper, na_action=None):
842841
# expected to be pd.Series(np.nan, ...). As np.nan is
843842
# of dtype float64 the return value of this method should
844843
# be float64 as well
845-
mapper = create_series_with_explicit_dtype(
846-
mapper, dtype_if_empty=np.float64
847-
)
844+
from pandas import Series
845+
846+
if len(mapper) == 0:
847+
mapper = Series(mapper, dtype=np.float64)
848+
else:
849+
mapper = Series(mapper)
848850

849851
if isinstance(mapper, ABCSeries):
850852
if na_action not in (None, "ignore"):

pandas/core/construction.py

-60
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
from typing import (
1010
TYPE_CHECKING,
11-
Any,
1211
Optional,
1312
Sequence,
1413
Union,
@@ -830,62 +829,3 @@ def _try_cast(
830829
subarr = np.array(arr, dtype=dtype, copy=copy)
831830

832831
return subarr
833-
834-
835-
def is_empty_data(data: Any) -> bool:
836-
"""
837-
Utility to check if a Series is instantiated with empty data,
838-
which does not contain dtype information.
839-
840-
Parameters
841-
----------
842-
data : array-like, Iterable, dict, or scalar value
843-
Contains data stored in Series.
844-
845-
Returns
846-
-------
847-
bool
848-
"""
849-
is_none = data is None
850-
is_list_like_without_dtype = is_list_like(data) and not hasattr(data, "dtype")
851-
is_simple_empty = is_list_like_without_dtype and not data
852-
return is_none or is_simple_empty
853-
854-
855-
def create_series_with_explicit_dtype(
856-
data: Any = None,
857-
index: ArrayLike | Index | None = None,
858-
dtype: Dtype | None = None,
859-
name: str | None = None,
860-
copy: bool = False,
861-
fastpath: bool = False,
862-
dtype_if_empty: Dtype = object,
863-
) -> Series:
864-
"""
865-
Helper to pass an explicit dtype when instantiating an empty Series.
866-
867-
This silences a DeprecationWarning described in GitHub-17261.
868-
869-
Parameters
870-
----------
871-
data : Mirrored from Series.__init__
872-
index : Mirrored from Series.__init__
873-
dtype : Mirrored from Series.__init__
874-
name : Mirrored from Series.__init__
875-
copy : Mirrored from Series.__init__
876-
fastpath : Mirrored from Series.__init__
877-
dtype_if_empty : str, numpy.dtype, or ExtensionDtype
878-
This dtype will be passed explicitly if an empty Series will
879-
be instantiated.
880-
881-
Returns
882-
-------
883-
Series
884-
"""
885-
from pandas.core.series import Series
886-
887-
if is_empty_data(data) and dtype is None:
888-
dtype = dtype_if_empty
889-
return Series(
890-
data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath
891-
)

pandas/core/generic.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -145,10 +145,7 @@
145145
from pandas.core.array_algos.replace import should_use_regex
146146
from pandas.core.arrays import ExtensionArray
147147
from pandas.core.base import PandasObject
148-
from pandas.core.construction import (
149-
create_series_with_explicit_dtype,
150-
extract_array,
151-
)
148+
from pandas.core.construction import extract_array
152149
from pandas.core.describe import describe_ndframe
153150
from pandas.core.flags import Flags
154151
from pandas.core.indexes.api import (
@@ -6843,9 +6840,9 @@ def fillna(
68436840
if inplace:
68446841
return None
68456842
return self.copy()
6846-
value = create_series_with_explicit_dtype(
6847-
value, dtype_if_empty=object
6848-
)
6843+
from pandas import Series
6844+
6845+
value = Series(value)
68496846
value = value.reindex(self.index, copy=False)
68506847
value = value._values
68516848
elif not is_list_like(value):

pandas/core/groupby/generic.py

+2-7
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@
8080
)
8181
from pandas.core.arrays.categorical import Categorical
8282
import pandas.core.common as com
83-
from pandas.core.construction import create_series_with_explicit_dtype
8483
from pandas.core.frame import DataFrame
8584
from pandas.core.groupby import base
8685
from pandas.core.groupby.groupby import (
@@ -295,9 +294,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
295294

296295
# result is a dict whose keys are the elements of result_index
297296
index = self.grouper.result_index
298-
return create_series_with_explicit_dtype(
299-
result, index=index, dtype_if_empty=object
300-
)
297+
return Series(result, index=index)
301298

302299
agg = aggregate
303300

@@ -1294,10 +1291,8 @@ def _wrap_applied_output_series(
12941291
key_index,
12951292
override_group_keys: bool,
12961293
) -> DataFrame | Series:
1297-
# this is to silence a DeprecationWarning
1298-
# TODO(2.0): Remove when default dtype of empty Series is object
12991294
kwargs = first_not_none._construct_axes_dict()
1300-
backup = create_series_with_explicit_dtype(dtype_if_empty=object, **kwargs)
1295+
backup = Series(**kwargs)
13011296
values = [x if (x is not None) else backup for x in values]
13021297

13031298
all_indexed_same = all_indexes_same(x.index for x in values)

pandas/core/internals/construction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,7 @@ def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]:
601601
else:
602602
if isinstance(val, dict):
603603
# GH#41785 this _should_ be equivalent to (but faster than)
604-
# val = create_series_with_explicit_dtype(val, index=index)._values
604+
# val = Series(val, index=index)._values
605605
if oindex is None:
606606
oindex = index.astype("O")
607607

pandas/core/series.py

+5-21
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,7 @@
119119
from pandas.core.arrays.categorical import CategoricalAccessor
120120
from pandas.core.arrays.sparse import SparseAccessor
121121
from pandas.core.construction import (
122-
create_series_with_explicit_dtype,
123122
extract_array,
124-
is_empty_data,
125123
sanitize_array,
126124
)
127125
from pandas.core.generic import NDFrame
@@ -389,18 +387,6 @@ def __init__(
389387

390388
name = ibase.maybe_extract_name(name, data, type(self))
391389

392-
if is_empty_data(data) and dtype is None:
393-
# gh-17261
394-
warnings.warn(
395-
"The default dtype for empty Series will be 'object' instead "
396-
"of 'float64' in a future version. Specify a dtype explicitly "
397-
"to silence this warning.",
398-
FutureWarning,
399-
stacklevel=find_stack_level(),
400-
)
401-
# uncomment the line below when removing the FutureWarning
402-
# dtype = np.dtype(object)
403-
404390
if index is not None:
405391
index = ensure_index(index)
406392

@@ -458,6 +444,9 @@ def __init__(
458444
pass
459445
else:
460446
data = com.maybe_iterable_to_list(data)
447+
if is_list_like(data) and not len(data) and dtype is None:
448+
# GH 29405: Pre-2.0, this defaulted to float.
449+
dtype = np.dtype(object)
461450

462451
if index is None:
463452
if not is_list_like(data):
@@ -531,15 +520,10 @@ def _init_dict(
531520

532521
# Input is now list-like, so rely on "standard" construction:
533522

534-
# TODO: passing np.float64 to not break anything yet. See GH-17261
535-
s = create_series_with_explicit_dtype(
536-
# error: Argument "index" to "create_series_with_explicit_dtype" has
537-
# incompatible type "Tuple[Any, ...]"; expected "Union[ExtensionArray,
538-
# ndarray, Index, None]"
523+
s = self._constructor(
539524
values,
540-
index=keys, # type: ignore[arg-type]
525+
index=keys,
541526
dtype=dtype,
542-
dtype_if_empty=np.float64,
543527
)
544528

545529
# Now we just make sure the order is respected, if any

pandas/io/html.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@
3232
from pandas.core.dtypes.common import is_list_like
3333

3434
from pandas import isna
35-
from pandas.core.construction import create_series_with_explicit_dtype
3635
from pandas.core.indexes.base import Index
3736
from pandas.core.indexes.multi import MultiIndex
37+
from pandas.core.series import Series
3838

3939
from pandas.io.common import (
4040
file_exists,
@@ -858,7 +858,7 @@ def _parse_tfoot_tr(self, table):
858858

859859
def _expand_elements(body) -> None:
860860
data = [len(elem) for elem in body]
861-
lens = create_series_with_explicit_dtype(data, dtype_if_empty=object)
861+
lens = Series(data)
862862
lens_max = lens.max()
863863
not_max = lens[lens != lens_max]
864864

pandas/io/json/_json.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
notna,
5050
to_datetime,
5151
)
52-
from pandas.core.construction import create_series_with_explicit_dtype
5352
from pandas.core.reshape.concat import concat
5453
from pandas.core.shared_docs import _shared_docs
5554

@@ -1221,9 +1220,9 @@ def _parse(self) -> None:
12211220
if self.orient == "split":
12221221
decoded = {str(k): v for k, v in data.items()}
12231222
self.check_keys_split(decoded)
1224-
self.obj = create_series_with_explicit_dtype(**decoded)
1223+
self.obj = Series(**decoded)
12251224
else:
1226-
self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object)
1225+
self.obj = Series(data)
12271226

12281227
def _try_convert_types(self) -> None:
12291228
if self.obj is None:

pandas/tests/frame/constructors/test_from_dict.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
Series,
1111
)
1212
import pandas._testing as tm
13-
from pandas.core.construction import create_series_with_explicit_dtype
1413

1514

1615
class TestFromDict:
@@ -79,9 +78,7 @@ def test_constructor_list_of_series(self):
7978
OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
8079
OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
8180
]
82-
data = [
83-
create_series_with_explicit_dtype(d, dtype_if_empty=object) for d in data
84-
]
81+
data = [Series(d) for d in data]
8582

8683
result = DataFrame(data)
8784
sdict = OrderedDict(zip(range(len(data)), data))

pandas/tests/reshape/concat/test_concat.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
)
3131
import pandas._testing as tm
3232
from pandas.core.arrays import SparseArray
33-
from pandas.core.construction import create_series_with_explicit_dtype
3433
from pandas.tests.extension.decimal import to_decimal
3534

3635

@@ -519,7 +518,7 @@ def test_concat_no_unnecessary_upcast(dt, frame_or_series):
519518
assert x.values.dtype == dt
520519

521520

522-
@pytest.mark.parametrize("pdt", [create_series_with_explicit_dtype, DataFrame])
521+
@pytest.mark.parametrize("pdt", [Series, DataFrame])
523522
@pytest.mark.parametrize("dt", np.sctypes["int"])
524523
def test_concat_will_upcast(dt, pdt):
525524
with catch_warnings(record=True):

pandas/tests/series/methods/test_astype.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,7 @@ def test_astype_empty_constructor_equality(self, dtype):
108108
"m", # Generic timestamps raise a ValueError. Already tested.
109109
):
110110
init_empty = Series([], dtype=dtype)
111-
with tm.assert_produces_warning(FutureWarning):
112-
as_type_empty = Series([]).astype(dtype)
111+
as_type_empty = Series([]).astype(dtype)
113112
tm.assert_series_equal(init_empty, as_type_empty)
114113

115114
@pytest.mark.parametrize("dtype", [str, np.str_])

pandas/tests/series/methods/test_is_unique.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import pytest
33

44
from pandas import Series
5-
from pandas.core.construction import create_series_with_explicit_dtype
65

76

87
@pytest.mark.parametrize(
@@ -19,7 +18,7 @@
1918
)
2019
def test_is_unique(data, expected):
2120
# GH#11946 / GH#25180
22-
ser = create_series_with_explicit_dtype(data, dtype_if_empty=object)
21+
ser = Series(data)
2322
assert ser.is_unique is expected
2423

2524

pandas/tests/series/methods/test_replace.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -309,8 +309,7 @@ def test_replace_with_empty_dictlike(self):
309309
s = pd.Series(list("abcd"))
310310
tm.assert_series_equal(s, s.replace({}))
311311

312-
with tm.assert_produces_warning(FutureWarning):
313-
empty_series = pd.Series([])
312+
empty_series = pd.Series([])
314313
tm.assert_series_equal(s, s.replace(empty_series))
315314

316315
def test_replace_string_with_number(self):

0 commit comments

Comments
 (0)