diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 5aa753dffcf7f..021d7b095b7dc 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -264,6 +264,7 @@ Removal of prior version deprecations/changes - Removed the ``display.column_space`` option in favor of ``df.to_string(col_space=...)`` (:issue:`47280`) - Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`) - Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`) +- Changed behavior of empty data passed into :class:`Series`; the default dtype will be ``object`` instead of ``float64`` (:issue:`29405`) - Changed the behavior of :func:`to_datetime` with argument "now" with ``utc=False`` to match ``Timestamp("now")`` (:issue:`18705`) - Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`) - Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 4f9af2d0c01d6..5c35b509ba00a 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -58,10 +58,7 @@ from pandas.core.algorithms import safe_sort from pandas.core.base import SelectionMixin import pandas.core.common as com -from pandas.core.construction import ( - create_series_with_explicit_dtype, - ensure_wrapped_if_datetimelike, -) +from pandas.core.construction import ensure_wrapped_if_datetimelike if TYPE_CHECKING: from pandas import ( @@ -906,14 +903,12 @@ def wrap_results(self, results: ResType, res_index: Index) -> DataFrame | Series # dict of scalars - # the default dtype of an empty Series will be `object`, but this + # the default dtype of an empty Series is `object`, but this # code can be hit by df.mean() where the result should have dtype # float64 even if it's an empty Series. constructor_sliced = self.obj._constructor_sliced - if constructor_sliced is Series: - result = create_series_with_explicit_dtype( - results, dtype_if_empty=np.float64 - ) + if len(results) == 0 and constructor_sliced is Series: + result = constructor_sliced(results, dtype=np.float64) else: result = constructor_sliced(results) result.index = res_index diff --git a/pandas/core/base.py b/pandas/core/base.py index 5e0694ea91360..e88ad801062c9 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -71,7 +71,6 @@ from pandas.core.arraylike import OpsMixin from pandas.core.arrays import ExtensionArray from pandas.core.construction import ( - create_series_with_explicit_dtype, ensure_wrapped_if_datetimelike, extract_array, ) @@ -842,9 +841,12 @@ def _map_values(self, mapper, na_action=None): # expected to be pd.Series(np.nan, ...). As np.nan is # of dtype float64 the return value of this method should # be float64 as well - mapper = create_series_with_explicit_dtype( - mapper, dtype_if_empty=np.float64 - ) + from pandas import Series + + if len(mapper) == 0: + mapper = Series(mapper, dtype=np.float64) + else: + mapper = Series(mapper) if isinstance(mapper, ABCSeries): if na_action not in (None, "ignore"): diff --git a/pandas/core/construction.py b/pandas/core/construction.py index c381496164630..259fd81911782 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -8,7 +8,6 @@ from typing import ( TYPE_CHECKING, - Any, Optional, Sequence, Union, @@ -830,62 +829,3 @@ def _try_cast( subarr = np.array(arr, dtype=dtype, copy=copy) return subarr - - -def is_empty_data(data: Any) -> bool: - """ - Utility to check if a Series is instantiated with empty data, - which does not contain dtype information. - - Parameters - ---------- - data : array-like, Iterable, dict, or scalar value - Contains data stored in Series. - - Returns - ------- - bool - """ - is_none = data is None - is_list_like_without_dtype = is_list_like(data) and not hasattr(data, "dtype") - is_simple_empty = is_list_like_without_dtype and not data - return is_none or is_simple_empty - - -def create_series_with_explicit_dtype( - data: Any = None, - index: ArrayLike | Index | None = None, - dtype: Dtype | None = None, - name: str | None = None, - copy: bool = False, - fastpath: bool = False, - dtype_if_empty: Dtype = object, -) -> Series: - """ - Helper to pass an explicit dtype when instantiating an empty Series. - - This silences a DeprecationWarning described in GitHub-17261. - - Parameters - ---------- - data : Mirrored from Series.__init__ - index : Mirrored from Series.__init__ - dtype : Mirrored from Series.__init__ - name : Mirrored from Series.__init__ - copy : Mirrored from Series.__init__ - fastpath : Mirrored from Series.__init__ - dtype_if_empty : str, numpy.dtype, or ExtensionDtype - This dtype will be passed explicitly if an empty Series will - be instantiated. - - Returns - ------- - Series - """ - from pandas.core.series import Series - - if is_empty_data(data) and dtype is None: - dtype = dtype_if_empty - return Series( - data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath - ) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 05494e37256df..cf492f0716ef5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -145,10 +145,7 @@ from pandas.core.array_algos.replace import should_use_regex from pandas.core.arrays import ExtensionArray from pandas.core.base import PandasObject -from pandas.core.construction import ( - create_series_with_explicit_dtype, - extract_array, -) +from pandas.core.construction import extract_array from pandas.core.describe import describe_ndframe from pandas.core.flags import Flags from pandas.core.indexes.api import ( @@ -6843,9 +6840,9 @@ def fillna( if inplace: return None return self.copy() - value = create_series_with_explicit_dtype( - value, dtype_if_empty=object - ) + from pandas import Series + + value = Series(value) value = value.reindex(self.index, copy=False) value = value._values elif not is_list_like(value): diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 4c06ee60d3f6a..2ea88e4135d41 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -80,7 +80,6 @@ ) from pandas.core.arrays.categorical import Categorical import pandas.core.common as com -from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.frame import DataFrame from pandas.core.groupby import base from pandas.core.groupby.groupby import ( @@ -295,9 +294,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) # result is a dict whose keys are the elements of result_index index = self.grouper.result_index - return create_series_with_explicit_dtype( - result, index=index, dtype_if_empty=object - ) + return Series(result, index=index) agg = aggregate @@ -1294,10 +1291,8 @@ def _wrap_applied_output_series( key_index, override_group_keys: bool, ) -> DataFrame | Series: - # this is to silence a DeprecationWarning - # TODO(2.0): Remove when default dtype of empty Series is object kwargs = first_not_none._construct_axes_dict() - backup = create_series_with_explicit_dtype(dtype_if_empty=object, **kwargs) + backup = Series(**kwargs) values = [x if (x is not None) else backup for x in values] all_indexed_same = all_indexes_same(x.index for x in values) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index ee7026663b2b6..0b42a4a3cd0c1 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -601,7 +601,7 @@ def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]: else: if isinstance(val, dict): # GH#41785 this _should_ be equivalent to (but faster than) - # val = create_series_with_explicit_dtype(val, index=index)._values + # val = Series(val, index=index)._values if oindex is None: oindex = index.astype("O") diff --git a/pandas/core/series.py b/pandas/core/series.py index 9bfb2a0561532..9fe5cd0e4da9a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -119,9 +119,7 @@ from pandas.core.arrays.categorical import CategoricalAccessor from pandas.core.arrays.sparse import SparseAccessor from pandas.core.construction import ( - create_series_with_explicit_dtype, extract_array, - is_empty_data, sanitize_array, ) from pandas.core.generic import NDFrame @@ -389,18 +387,6 @@ def __init__( name = ibase.maybe_extract_name(name, data, type(self)) - if is_empty_data(data) and dtype is None: - # gh-17261 - warnings.warn( - "The default dtype for empty Series will be 'object' instead " - "of 'float64' in a future version. Specify a dtype explicitly " - "to silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - # uncomment the line below when removing the FutureWarning - # dtype = np.dtype(object) - if index is not None: index = ensure_index(index) @@ -458,6 +444,9 @@ def __init__( pass else: data = com.maybe_iterable_to_list(data) + if is_list_like(data) and not len(data) and dtype is None: + # GH 29405: Pre-2.0, this defaulted to float. + dtype = np.dtype(object) if index is None: if not is_list_like(data): @@ -531,15 +520,10 @@ def _init_dict( # Input is now list-like, so rely on "standard" construction: - # TODO: passing np.float64 to not break anything yet. See GH-17261 - s = create_series_with_explicit_dtype( - # error: Argument "index" to "create_series_with_explicit_dtype" has - # incompatible type "Tuple[Any, ...]"; expected "Union[ExtensionArray, - # ndarray, Index, None]" + s = self._constructor( values, - index=keys, # type: ignore[arg-type] + index=keys, dtype=dtype, - dtype_if_empty=np.float64, ) # Now we just make sure the order is respected, if any diff --git a/pandas/io/html.py b/pandas/io/html.py index a08b73d94250b..67c120a30280c 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -32,9 +32,9 @@ from pandas.core.dtypes.common import is_list_like from pandas import isna -from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.indexes.base import Index from pandas.core.indexes.multi import MultiIndex +from pandas.core.series import Series from pandas.io.common import ( file_exists, @@ -858,7 +858,7 @@ def _parse_tfoot_tr(self, table): def _expand_elements(body) -> None: data = [len(elem) for elem in body] - lens = create_series_with_explicit_dtype(data, dtype_if_empty=object) + lens = Series(data) lens_max = lens.max() not_max = lens[lens != lens_max] diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 15e7da1b8525e..43de9093587a7 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -49,7 +49,6 @@ notna, to_datetime, ) -from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.reshape.concat import concat from pandas.core.shared_docs import _shared_docs @@ -1221,9 +1220,9 @@ def _parse(self) -> None: if self.orient == "split": decoded = {str(k): v for k, v in data.items()} self.check_keys_split(decoded) - self.obj = create_series_with_explicit_dtype(**decoded) + self.obj = Series(**decoded) else: - self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object) + self.obj = Series(data) def _try_convert_types(self) -> None: if self.obj is None: diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py index 7c2b009673bb7..6cba95e42463d 100644 --- a/pandas/tests/frame/constructors/test_from_dict.py +++ b/pandas/tests/frame/constructors/test_from_dict.py @@ -10,7 +10,6 @@ Series, ) import pandas._testing as tm -from pandas.core.construction import create_series_with_explicit_dtype class TestFromDict: @@ -79,9 +78,7 @@ def test_constructor_list_of_series(self): OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]), OrderedDict([["b", 3], ["c", 4], ["d", 6]]), ] - data = [ - create_series_with_explicit_dtype(d, dtype_if_empty=object) for d in data - ] + data = [Series(d) for d in data] result = DataFrame(data) sdict = OrderedDict(zip(range(len(data)), data)) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index d15199e84f7ac..ea526c95f20e0 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -30,7 +30,6 @@ ) import pandas._testing as tm from pandas.core.arrays import SparseArray -from pandas.core.construction import create_series_with_explicit_dtype from pandas.tests.extension.decimal import to_decimal @@ -519,7 +518,7 @@ def test_concat_no_unnecessary_upcast(dt, frame_or_series): assert x.values.dtype == dt -@pytest.mark.parametrize("pdt", [create_series_with_explicit_dtype, DataFrame]) +@pytest.mark.parametrize("pdt", [Series, DataFrame]) @pytest.mark.parametrize("dt", np.sctypes["int"]) def test_concat_will_upcast(dt, pdt): with catch_warnings(record=True): diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 9b57f0f634a6c..462de7e28d42b 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -106,8 +106,7 @@ def test_astype_empty_constructor_equality(self, dtype): "m", # Generic timestamps raise a ValueError. Already tested. ): init_empty = Series([], dtype=dtype) - with tm.assert_produces_warning(FutureWarning): - as_type_empty = Series([]).astype(dtype) + as_type_empty = Series([]).astype(dtype) tm.assert_series_equal(init_empty, as_type_empty) @pytest.mark.parametrize("dtype", [str, np.str_]) diff --git a/pandas/tests/series/methods/test_is_unique.py b/pandas/tests/series/methods/test_is_unique.py index 960057cb3d646..db77f77467b42 100644 --- a/pandas/tests/series/methods/test_is_unique.py +++ b/pandas/tests/series/methods/test_is_unique.py @@ -2,7 +2,6 @@ import pytest from pandas import Series -from pandas.core.construction import create_series_with_explicit_dtype @pytest.mark.parametrize( @@ -19,7 +18,7 @@ ) def test_is_unique(data, expected): # GH#11946 / GH#25180 - ser = create_series_with_explicit_dtype(data, dtype_if_empty=object) + ser = Series(data) assert ser.is_unique is expected diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 77c9cf4013bd7..5850cd2907675 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -309,8 +309,7 @@ def test_replace_with_empty_dictlike(self): s = pd.Series(list("abcd")) tm.assert_series_equal(s, s.replace({})) - with tm.assert_produces_warning(FutureWarning): - empty_series = pd.Series([]) + empty_series = pd.Series([]) tm.assert_series_equal(s, s.replace(empty_series)) def test_replace_string_with_number(self): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 9817c758759d5..e75d5d8dd9638 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -74,9 +74,8 @@ class TestSeriesConstructors: ) def test_empty_constructor(self, constructor, check_index_type): # TODO: share with frame test of the same name - with tm.assert_produces_warning(FutureWarning): - expected = Series() - result = constructor() + expected = Series() + result = constructor() assert len(result.index) == 0 tm.assert_series_equal(result, expected, check_index_type=check_index_type) @@ -119,8 +118,7 @@ def test_scalar_extension_dtype(self, ea_scalar_and_dtype): tm.assert_series_equal(ser, expected) def test_constructor(self, datetime_series): - with tm.assert_produces_warning(FutureWarning): - empty_series = Series() + empty_series = Series() assert datetime_series.index._is_all_dates # Pass in Series @@ -137,8 +135,7 @@ def test_constructor(self, datetime_series): assert mixed[1] is np.NaN assert not empty_series.index._is_all_dates - with tm.assert_produces_warning(FutureWarning): - assert not Series().index._is_all_dates + assert not Series().index._is_all_dates # exception raised is of type ValueError GH35744 with pytest.raises(ValueError, match="Data must be 1-dimensional"): @@ -163,9 +160,8 @@ def test_constructor_index_ndim_gt_1_raises(self): @pytest.mark.parametrize("input_class", [list, dict, OrderedDict]) def test_constructor_empty(self, input_class): - with tm.assert_produces_warning(FutureWarning): - empty = Series() - empty2 = Series(input_class()) + empty = Series() + empty2 = Series(input_class()) # these are Index() and RangeIndex() which don't compare type equal # but are just .equals @@ -183,9 +179,8 @@ def test_constructor_empty(self, input_class): if input_class is not list: # With index: - with tm.assert_produces_warning(FutureWarning): - empty = Series(index=range(10)) - empty2 = Series(input_class(), index=range(10)) + empty = Series(index=range(10)) + empty2 = Series(input_class(), index=range(10)) tm.assert_series_equal(empty, empty2) # With index and dtype float64: @@ -217,8 +212,7 @@ def test_constructor_dtype_only(self, dtype, index): assert len(result) == 0 def test_constructor_no_data_index_order(self): - with tm.assert_produces_warning(FutureWarning): - result = Series(index=["b", "a", "c"]) + result = Series(index=["b", "a", "c"]) assert result.index.tolist() == ["b", "a", "c"] def test_constructor_no_data_string_type(self): @@ -696,8 +690,7 @@ def test_constructor_limit_copies(self, index): assert s._mgr.blocks[0].values is not index def test_constructor_pass_none(self): - with tm.assert_produces_warning(FutureWarning): - s = Series(None, index=range(5)) + s = Series(None, index=range(5)) assert s.dtype == np.float64 s = Series(None, index=range(5), dtype=object) @@ -705,9 +698,8 @@ def test_constructor_pass_none(self): # GH 7431 # inference on the index - with tm.assert_produces_warning(FutureWarning): - s = Series(index=np.array([None])) - expected = Series(index=Index([None])) + s = Series(index=np.array([None])) + expected = Series(index=Index([None])) tm.assert_series_equal(s, expected) def test_constructor_pass_nan_nat(self): diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index fd6f4e0083b08..a5620de7de65b 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -35,8 +35,7 @@ def test_subclass_unstack(self): tm.assert_frame_equal(res, exp) def test_subclass_empty_repr(self): - with tm.assert_produces_warning(FutureWarning): - sub_series = tm.SubclassedSeries() + sub_series = tm.SubclassedSeries() assert "SubclassedSeries" in repr(sub_series) def test_asof(self):