Skip to content

DEPR: Enforce empty Series returning object dtype #49342

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ Removal of prior version deprecations/changes
- Removed the ``display.column_space`` option in favor of ``df.to_string(col_space=...)`` (:issue:`47280`)
- Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`)
- Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`)
- Changed behavior of empty data passed into :class:`Series`; the default dtype will be ``object`` instead of ``float64`` (:issue:`29405`)
- Changed the behavior of :func:`to_datetime` with argument "now" with ``utc=False`` to match ``Timestamp("now")`` (:issue:`18705`)
- Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`)
- Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)
Expand Down
13 changes: 4 additions & 9 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,7 @@
from pandas.core.algorithms import safe_sort
from pandas.core.base import SelectionMixin
import pandas.core.common as com
from pandas.core.construction import (
create_series_with_explicit_dtype,
ensure_wrapped_if_datetimelike,
)
from pandas.core.construction import ensure_wrapped_if_datetimelike

if TYPE_CHECKING:
from pandas import (
Expand Down Expand Up @@ -906,14 +903,12 @@ def wrap_results(self, results: ResType, res_index: Index) -> DataFrame | Series

# dict of scalars

# the default dtype of an empty Series will be `object`, but this
# the default dtype of an empty Series is `object`, but this
# code can be hit by df.mean() where the result should have dtype
# float64 even if it's an empty Series.
constructor_sliced = self.obj._constructor_sliced
if constructor_sliced is Series:
result = create_series_with_explicit_dtype(
results, dtype_if_empty=np.float64
)
if len(results) == 0 and constructor_sliced is Series:
result = constructor_sliced(results, dtype=np.float64)
else:
result = constructor_sliced(results)
result.index = res_index
Expand Down
10 changes: 6 additions & 4 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays import ExtensionArray
from pandas.core.construction import (
create_series_with_explicit_dtype,
ensure_wrapped_if_datetimelike,
extract_array,
)
Expand Down Expand Up @@ -842,9 +841,12 @@ def _map_values(self, mapper, na_action=None):
# expected to be pd.Series(np.nan, ...). As np.nan is
# of dtype float64 the return value of this method should
# be float64 as well
mapper = create_series_with_explicit_dtype(
mapper, dtype_if_empty=np.float64
)
from pandas import Series

if len(mapper) == 0:
mapper = Series(mapper, dtype=np.float64)
else:
mapper = Series(mapper)

if isinstance(mapper, ABCSeries):
if na_action not in (None, "ignore"):
Expand Down
60 changes: 0 additions & 60 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

from typing import (
TYPE_CHECKING,
Any,
Optional,
Sequence,
Union,
Expand Down Expand Up @@ -830,62 +829,3 @@ def _try_cast(
subarr = np.array(arr, dtype=dtype, copy=copy)

return subarr


def is_empty_data(data: Any) -> bool:
"""
Utility to check if a Series is instantiated with empty data,
which does not contain dtype information.

Parameters
----------
data : array-like, Iterable, dict, or scalar value
Contains data stored in Series.

Returns
-------
bool
"""
is_none = data is None
is_list_like_without_dtype = is_list_like(data) and not hasattr(data, "dtype")
is_simple_empty = is_list_like_without_dtype and not data
return is_none or is_simple_empty


def create_series_with_explicit_dtype(
data: Any = None,
index: ArrayLike | Index | None = None,
dtype: Dtype | None = None,
name: str | None = None,
copy: bool = False,
fastpath: bool = False,
dtype_if_empty: Dtype = object,
) -> Series:
"""
Helper to pass an explicit dtype when instantiating an empty Series.

This silences a DeprecationWarning described in GitHub-17261.

Parameters
----------
data : Mirrored from Series.__init__
index : Mirrored from Series.__init__
dtype : Mirrored from Series.__init__
name : Mirrored from Series.__init__
copy : Mirrored from Series.__init__
fastpath : Mirrored from Series.__init__
dtype_if_empty : str, numpy.dtype, or ExtensionDtype
This dtype will be passed explicitly if an empty Series will
be instantiated.

Returns
-------
Series
"""
from pandas.core.series import Series

if is_empty_data(data) and dtype is None:
dtype = dtype_if_empty
return Series(
data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath
)
11 changes: 4 additions & 7 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,7 @@
from pandas.core.array_algos.replace import should_use_regex
from pandas.core.arrays import ExtensionArray
from pandas.core.base import PandasObject
from pandas.core.construction import (
create_series_with_explicit_dtype,
extract_array,
)
from pandas.core.construction import extract_array
from pandas.core.describe import describe_ndframe
from pandas.core.flags import Flags
from pandas.core.indexes.api import (
Expand Down Expand Up @@ -6843,9 +6840,9 @@ def fillna(
if inplace:
return None
return self.copy()
value = create_series_with_explicit_dtype(
value, dtype_if_empty=object
)
from pandas import Series

value = Series(value)
value = value.reindex(self.index, copy=False)
value = value._values
elif not is_list_like(value):
Expand Down
9 changes: 2 additions & 7 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@
)
from pandas.core.arrays.categorical import Categorical
import pandas.core.common as com
from pandas.core.construction import create_series_with_explicit_dtype
from pandas.core.frame import DataFrame
from pandas.core.groupby import base
from pandas.core.groupby.groupby import (
Expand Down Expand Up @@ -295,9 +294,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)

# result is a dict whose keys are the elements of result_index
index = self.grouper.result_index
return create_series_with_explicit_dtype(
result, index=index, dtype_if_empty=object
)
return Series(result, index=index)

agg = aggregate

Expand Down Expand Up @@ -1294,10 +1291,8 @@ def _wrap_applied_output_series(
key_index,
override_group_keys: bool,
) -> DataFrame | Series:
# this is to silence a DeprecationWarning
# TODO(2.0): Remove when default dtype of empty Series is object
kwargs = first_not_none._construct_axes_dict()
backup = create_series_with_explicit_dtype(dtype_if_empty=object, **kwargs)
backup = Series(**kwargs)
values = [x if (x is not None) else backup for x in values]

all_indexed_same = all_indexes_same(x.index for x in values)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,7 @@ def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]:
else:
if isinstance(val, dict):
# GH#41785 this _should_ be equivalent to (but faster than)
# val = create_series_with_explicit_dtype(val, index=index)._values
# val = Series(val, index=index)._values
if oindex is None:
oindex = index.astype("O")

Expand Down
26 changes: 5 additions & 21 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,7 @@
from pandas.core.arrays.categorical import CategoricalAccessor
from pandas.core.arrays.sparse import SparseAccessor
from pandas.core.construction import (
create_series_with_explicit_dtype,
extract_array,
is_empty_data,
sanitize_array,
)
from pandas.core.generic import NDFrame
Expand Down Expand Up @@ -389,18 +387,6 @@ def __init__(

name = ibase.maybe_extract_name(name, data, type(self))

if is_empty_data(data) and dtype is None:
# gh-17261
warnings.warn(
"The default dtype for empty Series will be 'object' instead "
"of 'float64' in a future version. Specify a dtype explicitly "
"to silence this warning.",
FutureWarning,
stacklevel=find_stack_level(),
)
# uncomment the line below when removing the FutureWarning
# dtype = np.dtype(object)

if index is not None:
index = ensure_index(index)

Expand Down Expand Up @@ -458,6 +444,9 @@ def __init__(
pass
else:
data = com.maybe_iterable_to_list(data)
if is_list_like(data) and not len(data) and dtype is None:
# GH 29405: Pre-2.0, this defaulted to float.
dtype = np.dtype(object)

if index is None:
if not is_list_like(data):
Expand Down Expand Up @@ -531,15 +520,10 @@ def _init_dict(

# Input is now list-like, so rely on "standard" construction:

# TODO: passing np.float64 to not break anything yet. See GH-17261
s = create_series_with_explicit_dtype(
# error: Argument "index" to "create_series_with_explicit_dtype" has
# incompatible type "Tuple[Any, ...]"; expected "Union[ExtensionArray,
# ndarray, Index, None]"
s = self._constructor(
values,
index=keys, # type: ignore[arg-type]
index=keys,
dtype=dtype,
dtype_if_empty=np.float64,
)

# Now we just make sure the order is respected, if any
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@
from pandas.core.dtypes.common import is_list_like

from pandas import isna
from pandas.core.construction import create_series_with_explicit_dtype
from pandas.core.indexes.base import Index
from pandas.core.indexes.multi import MultiIndex
from pandas.core.series import Series

from pandas.io.common import (
file_exists,
Expand Down Expand Up @@ -858,7 +858,7 @@ def _parse_tfoot_tr(self, table):

def _expand_elements(body) -> None:
data = [len(elem) for elem in body]
lens = create_series_with_explicit_dtype(data, dtype_if_empty=object)
lens = Series(data)
lens_max = lens.max()
not_max = lens[lens != lens_max]

Expand Down
5 changes: 2 additions & 3 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
notna,
to_datetime,
)
from pandas.core.construction import create_series_with_explicit_dtype
from pandas.core.reshape.concat import concat
from pandas.core.shared_docs import _shared_docs

Expand Down Expand Up @@ -1221,9 +1220,9 @@ def _parse(self) -> None:
if self.orient == "split":
decoded = {str(k): v for k, v in data.items()}
self.check_keys_split(decoded)
self.obj = create_series_with_explicit_dtype(**decoded)
self.obj = Series(**decoded)
else:
self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object)
self.obj = Series(data)

def _try_convert_types(self) -> None:
if self.obj is None:
Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/frame/constructors/test_from_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
Series,
)
import pandas._testing as tm
from pandas.core.construction import create_series_with_explicit_dtype


class TestFromDict:
Expand Down Expand Up @@ -79,9 +78,7 @@ def test_constructor_list_of_series(self):
OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
]
data = [
create_series_with_explicit_dtype(d, dtype_if_empty=object) for d in data
]
data = [Series(d) for d in data]

result = DataFrame(data)
sdict = OrderedDict(zip(range(len(data)), data))
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/reshape/concat/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
)
import pandas._testing as tm
from pandas.core.arrays import SparseArray
from pandas.core.construction import create_series_with_explicit_dtype
from pandas.tests.extension.decimal import to_decimal


Expand Down Expand Up @@ -519,7 +518,7 @@ def test_concat_no_unnecessary_upcast(dt, frame_or_series):
assert x.values.dtype == dt


@pytest.mark.parametrize("pdt", [create_series_with_explicit_dtype, DataFrame])
@pytest.mark.parametrize("pdt", [Series, DataFrame])
@pytest.mark.parametrize("dt", np.sctypes["int"])
def test_concat_will_upcast(dt, pdt):
with catch_warnings(record=True):
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,7 @@ def test_astype_empty_constructor_equality(self, dtype):
"m", # Generic timestamps raise a ValueError. Already tested.
):
init_empty = Series([], dtype=dtype)
with tm.assert_produces_warning(FutureWarning):
as_type_empty = Series([]).astype(dtype)
as_type_empty = Series([]).astype(dtype)
tm.assert_series_equal(init_empty, as_type_empty)

@pytest.mark.parametrize("dtype", [str, np.str_])
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/series/methods/test_is_unique.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pytest

from pandas import Series
from pandas.core.construction import create_series_with_explicit_dtype


@pytest.mark.parametrize(
Expand All @@ -19,7 +18,7 @@
)
def test_is_unique(data, expected):
# GH#11946 / GH#25180
ser = create_series_with_explicit_dtype(data, dtype_if_empty=object)
ser = Series(data)
assert ser.is_unique is expected


Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,7 @@ def test_replace_with_empty_dictlike(self):
s = pd.Series(list("abcd"))
tm.assert_series_equal(s, s.replace({}))

with tm.assert_produces_warning(FutureWarning):
empty_series = pd.Series([])
empty_series = pd.Series([])
tm.assert_series_equal(s, s.replace(empty_series))

def test_replace_string_with_number(self):
Expand Down
Loading