diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index 11957cfa265f5..1cc485a229123 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -190,7 +190,7 @@ The sum of an empty or all-NA Series or column of a DataFrame is 0. pd.Series([np.nan]).sum() - pd.Series([]).sum() + pd.Series([], dtype="float64").sum() The product of an empty or all-NA Series or column of a DataFrame is 1. @@ -198,7 +198,7 @@ The product of an empty or all-NA Series or column of a DataFrame is 1. pd.Series([np.nan]).prod() - pd.Series([]).prod() + pd.Series([], dtype="float64").prod() NA values in GroupBy diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index ba213864ec469..0611c6334937f 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -358,6 +358,7 @@ results will fit in memory, so we can safely call ``compute`` without running out of memory. At that point it's just a regular pandas object. .. ipython:: python + :okwarning: @savefig dask_resample.png ddf[['x', 'y']].resample("1D").mean().cumsum().compute().plot() diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index 61a65415f6b57..6f6446c3f74e1 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -707,6 +707,7 @@ A ``Series`` will now correctly promote its dtype for assignment with incompat v .. ipython:: python + :okwarning: s = pd.Series() diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst index a9c7937308204..f33943e423b25 100644 --- a/doc/source/whatsnew/v0.21.0.rst +++ b/doc/source/whatsnew/v0.21.0.rst @@ -428,6 +428,7 @@ Note that this also changes the sum of an empty ``Series``. Previously this alwa but for consistency with the all-NaN case, this was changed to return NaN as well: .. ipython:: python + :okwarning: pd.Series([]).sum() diff --git a/doc/source/whatsnew/v0.22.0.rst b/doc/source/whatsnew/v0.22.0.rst index ea36b35d61740..75949a90d09a6 100644 --- a/doc/source/whatsnew/v0.22.0.rst +++ b/doc/source/whatsnew/v0.22.0.rst @@ -55,6 +55,7 @@ The default sum for empty or all-*NA* ``Series`` is now ``0``. *pandas 0.22.0* .. ipython:: python + :okwarning: pd.Series([]).sum() pd.Series([np.nan]).sum() @@ -67,6 +68,7 @@ pandas 0.20.3 without bottleneck, or pandas 0.21.x), use the ``min_count`` keyword. .. ipython:: python + :okwarning: pd.Series([]).sum(min_count=1) @@ -85,6 +87,7 @@ required for a non-NA sum or product. returning ``1`` instead. .. ipython:: python + :okwarning: pd.Series([]).prod() pd.Series([np.nan]).prod() diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4ce4c12483b36..771b3e484f67c 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -366,6 +366,23 @@ When :class:`Categorical` contains ``np.nan``, pd.Categorical([1, 2, np.nan], ordered=True).min() + +Default dtype of empty :class:`pandas.Series` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Initialising an empty :class:`pandas.Series` without specifying a dtype will raise a `DeprecationWarning` now +(:issue:`17261`). The default dtype will change from ``float64`` to ``object`` in future releases so that it is +consistent with the behaviour of :class:`DataFrame` and :class:`Index`. + +*pandas 1.0.0* + +.. code-block:: ipython + + In [1]: pd.Series() + Out[2]: + DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning. + Series([], dtype: float64) + .. _whatsnew_1000.api_breaking.deps: Increased minimum versions for dependencies @@ -494,7 +511,7 @@ Removal of prior version deprecations/changes Previously, pandas would register converters with matplotlib as a side effect of importing pandas (:issue:`18720`). This changed the output of plots made via matplotlib plots after pandas was imported, even if you were using -matplotlib directly rather than rather than :meth:`~DataFrame.plot`. +matplotlib directly rather than :meth:`~DataFrame.plot`. To use pandas formatters with a matplotlib plot, specify diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index aeec5e8a0400a..7dfed94482a05 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -64,7 +64,7 @@ def __new__(cls) -> "Series": # type: ignore stacklevel=6, ) - return Series() + return Series(dtype=object) class _LoadSparseFrame: diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 8c49b2b803241..ef3d8cd53596b 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -15,6 +15,8 @@ ) from pandas.core.dtypes.generic import ABCMultiIndex, ABCSeries +from pandas.core.construction import create_series_with_explicit_dtype + if TYPE_CHECKING: from pandas import DataFrame, Series, Index @@ -203,7 +205,7 @@ def apply_empty_result(self): if not should_reduce: try: - r = self.f(Series([])) + r = self.f(Series([], dtype=np.float64)) except Exception: pass else: @@ -211,7 +213,7 @@ def apply_empty_result(self): if should_reduce: if len(self.agg_axis): - r = self.f(Series([])) + r = self.f(Series([], dtype=np.float64)) else: r = np.nan @@ -346,6 +348,7 @@ def apply_series_generator(self) -> Tuple[ResType, "Index"]: def wrap_results( self, results: ResType, res_index: "Index" ) -> Union["Series", "DataFrame"]: + from pandas import Series # see if we can infer the results if len(results) > 0 and 0 in results and is_sequence(results[0]): @@ -353,7 +356,17 @@ def wrap_results( return self.wrap_results_for_axis(results, res_index) # dict of scalars - result = self.obj._constructor_sliced(results) + + # the default dtype of an empty Series will be `object`, but this + # code can be hit by df.mean() where the result should have dtype + # float64 even if it's an empty Series. + constructor_sliced = self.obj._constructor_sliced + if constructor_sliced is Series: + result = create_series_with_explicit_dtype( + results, dtype_if_empty=np.float64 + ) + else: + result = constructor_sliced(results) result.index = res_index return result diff --git a/pandas/core/base.py b/pandas/core/base.py index 5e613849ba8d5..b7216d2a70ee6 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -34,6 +34,7 @@ from pandas.core.accessor import DirNamesMixin from pandas.core.algorithms import duplicated, unique1d, value_counts from pandas.core.arrays import ExtensionArray +from pandas.core.construction import create_series_with_explicit_dtype import pandas.core.nanops as nanops _shared_docs: Dict[str, str] = dict() @@ -1132,9 +1133,14 @@ def _map_values(self, mapper, na_action=None): # convert to an Series for efficiency. # we specify the keys here to handle the # possibility that they are tuples - from pandas import Series - mapper = Series(mapper) + # The return value of mapping with an empty mapper is + # expected to be pd.Series(np.nan, ...). As np.nan is + # of dtype float64 the return value of this method should + # be float64 as well + mapper = create_series_with_explicit_dtype( + mapper, dtype_if_empty=np.float64 + ) if isinstance(mapper, ABCSeries): # Since values were input this means we came from either diff --git a/pandas/core/construction.py b/pandas/core/construction.py index dc537d50b3419..b03c69d865301 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -4,7 +4,7 @@ These should not depend on core.internals. """ -from typing import Optional, Sequence, Union, cast +from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast import numpy as np import numpy.ma as ma @@ -44,8 +44,13 @@ ) from pandas.core.dtypes.missing import isna +from pandas._typing import ArrayLike, Dtype import pandas.core.common as com +if TYPE_CHECKING: + from pandas.core.series import Series # noqa: F401 + from pandas.core.index import Index # noqa: F401 + def array( data: Sequence[object], @@ -565,3 +570,62 @@ def _try_cast( else: subarr = np.array(arr, dtype=object, copy=copy) return subarr + + +def is_empty_data(data: Any) -> bool: + """ + Utility to check if a Series is instantiated with empty data, + which does not contain dtype information. + + Parameters + ---------- + data : array-like, Iterable, dict, or scalar value + Contains data stored in Series. + + Returns + ------- + bool + """ + is_none = data is None + is_list_like_without_dtype = is_list_like(data) and not hasattr(data, "dtype") + is_simple_empty = is_list_like_without_dtype and not data + return is_none or is_simple_empty + + +def create_series_with_explicit_dtype( + data: Any = None, + index: Optional[Union[ArrayLike, "Index"]] = None, + dtype: Optional[Dtype] = None, + name: Optional[str] = None, + copy: bool = False, + fastpath: bool = False, + dtype_if_empty: Dtype = object, +) -> "Series": + """ + Helper to pass an explicit dtype when instantiating an empty Series. + + This silences a DeprecationWarning described in GitHub-17261. + + Parameters + ---------- + data : Mirrored from Series.__init__ + index : Mirrored from Series.__init__ + dtype : Mirrored from Series.__init__ + name : Mirrored from Series.__init__ + copy : Mirrored from Series.__init__ + fastpath : Mirrored from Series.__init__ + dtype_if_empty : str, numpy.dtype, or ExtensionDtype + This dtype will be passed explicitly if an empty Series will + be instantiated. + + Returns + ------- + Series + """ + from pandas.core.series import Series + + if is_empty_data(data) and dtype is None: + dtype = dtype_if_empty + return Series( + data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath + ) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 601dac3a1208b..c1616efabcdba 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7956,7 +7956,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"): cols = Index([], name=self.columns.name) if is_list_like(q): return self._constructor([], index=q, columns=cols) - return self._constructor_sliced([], index=cols, name=q) + return self._constructor_sliced([], index=cols, name=q, dtype=np.float64) result = data._data.quantile( qs=q, axis=1, interpolation=interpolation, transposed=is_transposed diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9aecd97194aad..efdcfa7edbba3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -72,6 +72,7 @@ import pandas.core.algorithms as algos from pandas.core.base import PandasObject, SelectionMixin import pandas.core.common as com +from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.index import ( Index, InvalidIndexError, @@ -6042,9 +6043,9 @@ def fillna( if self.ndim == 1: if isinstance(value, (dict, ABCSeries)): - from pandas import Series - - value = Series(value) + value = create_series_with_explicit_dtype( + value, dtype_if_empty=object + ) elif not is_list_like(value): pass else: @@ -6996,7 +6997,7 @@ def asof(self, where, subset=None): if not is_series: from pandas import Series - return Series(index=self.columns, name=where) + return Series(index=self.columns, name=where, dtype=np.float64) return np.nan # It's always much faster to use a *while* loop here for diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 4726cdfb05a70..9bb0b8de9ba71 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -51,6 +51,7 @@ import pandas.core.algorithms as algorithms from pandas.core.base import DataError, SpecificationError import pandas.core.common as com +from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.frame import DataFrame from pandas.core.generic import ABCDataFrame, ABCSeries, NDFrame, _shared_docs from pandas.core.groupby import base @@ -259,7 +260,9 @@ def aggregate(self, func=None, *args, **kwargs): result = self._aggregate_named(func, *args, **kwargs) index = Index(sorted(result), name=self.grouper.names[0]) - ret = Series(result, index=index) + ret = create_series_with_explicit_dtype( + result, index=index, dtype_if_empty=object + ) if not self.as_index: # pragma: no cover print("Warning, ignoring as_index=True") @@ -407,7 +410,7 @@ def _wrap_transformed_output( def _wrap_applied_output(self, keys, values, not_indexed_same=False): if len(keys) == 0: # GH #6265 - return Series([], name=self._selection_name, index=keys) + return Series([], name=self._selection_name, index=keys, dtype=np.float64) def _get_index() -> Index: if self.grouper.nkeys > 1: @@ -493,7 +496,7 @@ def _transform_general(self, func, *args, **kwargs): result = concat(results).sort_index() else: - result = Series() + result = Series(dtype=np.float64) # we will only try to coerce the result type if # we have a numeric dtype, as these are *always* user-defined funcs @@ -1205,10 +1208,18 @@ def first_not_none(values): if v is None: return DataFrame() elif isinstance(v, NDFrame): - values = [ - x if x is not None else v._constructor(**v._construct_axes_dict()) - for x in values - ] + + # this is to silence a DeprecationWarning + # TODO: Remove when default dtype of empty Series is object + kwargs = v._construct_axes_dict() + if v._constructor is Series: + backup = create_series_with_explicit_dtype( + **kwargs, dtype_if_empty=object + ) + else: + backup = v._constructor(**kwargs) + + values = [x if (x is not None) else backup for x in values] v = values[0] diff --git a/pandas/core/series.py b/pandas/core/series.py index 537a960f7d463..efa3d33a2a79a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -54,7 +54,12 @@ from pandas.core.arrays.categorical import Categorical, CategoricalAccessor from pandas.core.arrays.sparse import SparseAccessor import pandas.core.common as com -from pandas.core.construction import extract_array, sanitize_array +from pandas.core.construction import ( + create_series_with_explicit_dtype, + extract_array, + is_empty_data, + sanitize_array, +) from pandas.core.index import ( Float64Index, Index, @@ -177,7 +182,6 @@ class Series(base.IndexOpsMixin, generic.NDFrame): def __init__( self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False ): - # we are called internally, so short-circuit if fastpath: @@ -191,6 +195,18 @@ def __init__( else: + if is_empty_data(data) and dtype is None: + # gh-17261 + warnings.warn( + "The default dtype for empty Series will be 'object' instead" + " of 'float64' in a future version. Specify a dtype explicitly" + " to silence this warning.", + DeprecationWarning, + stacklevel=2, + ) + # uncomment the line below when removing the DeprecationWarning + # dtype = np.dtype(object) + if index is not None: index = ensure_index(index) @@ -330,7 +346,11 @@ def _init_dict(self, data, index=None, dtype=None): keys, values = [], [] # Input is now list-like, so rely on "standard" construction: - s = Series(values, index=keys, dtype=dtype) + + # TODO: passing np.float64 to not break anything yet. See GH-17261 + s = create_series_with_explicit_dtype( + values, index=keys, dtype=dtype, dtype_if_empty=np.float64 + ) # Now we just make sure the order is respected, if any if data and index is not None: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 453d1cca2e085..3dfafd04dff0a 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -145,7 +145,8 @@ def _maybe_cache(arg, format, cache, convert_listlike): """ from pandas import Series - cache_array = Series() + cache_array = Series(dtype=object) + if cache: # Perform a quicker unique check if not should_cache(arg): diff --git a/pandas/io/html.py b/pandas/io/html.py index b8cb6679a9562..c629c0bab7779 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -14,7 +14,7 @@ from pandas.core.dtypes.common import is_list_like -from pandas import Series +from pandas.core.construction import create_series_with_explicit_dtype from pandas.io.common import _is_url, _validate_header_arg, urlopen from pandas.io.formats.printing import pprint_thing @@ -762,7 +762,8 @@ def _parse_tfoot_tr(self, table): def _expand_elements(body): - lens = Series([len(elem) for elem in body]) + data = [len(elem) for elem in body] + lens = create_series_with_explicit_dtype(data, dtype_if_empty=object) lens_max = lens.max() not_max = lens[lens != lens_max] diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 89d5b52ffbf1e..30c1c2d59e983 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1,4 +1,5 @@ from collections import OrderedDict +import functools from io import StringIO from itertools import islice import os @@ -14,6 +15,7 @@ from pandas import DataFrame, MultiIndex, Series, isna, to_datetime from pandas._typing import JSONSerializable +from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.reshape.concat import concat from pandas.io.common import ( @@ -1006,44 +1008,34 @@ class SeriesParser(Parser): _split_keys = ("name", "index", "data") def _parse_no_numpy(self): + data = loads(self.json, precise_float=self.precise_float) - json = self.json - orient = self.orient - if orient == "split": - decoded = { - str(k): v - for k, v in loads(json, precise_float=self.precise_float).items() - } + if self.orient == "split": + decoded = {str(k): v for k, v in data.items()} self.check_keys_split(decoded) - self.obj = Series(dtype=None, **decoded) + self.obj = create_series_with_explicit_dtype(**decoded) else: - self.obj = Series(loads(json, precise_float=self.precise_float), dtype=None) + self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object) def _parse_numpy(self): + load_kwargs = { + "dtype": None, + "numpy": True, + "precise_float": self.precise_float, + } + if self.orient in ["columns", "index"]: + load_kwargs["labelled"] = True + loads_ = functools.partial(loads, **load_kwargs) + data = loads_(self.json) - json = self.json - orient = self.orient - if orient == "split": - decoded = loads( - json, dtype=None, numpy=True, precise_float=self.precise_float - ) - decoded = {str(k): v for k, v in decoded.items()} + if self.orient == "split": + decoded = {str(k): v for k, v in data.items()} self.check_keys_split(decoded) - self.obj = Series(**decoded) - elif orient == "columns" or orient == "index": - self.obj = Series( - *loads( - json, - dtype=None, - numpy=True, - labelled=True, - precise_float=self.precise_float, - ) - ) + self.obj = create_series_with_explicit_dtype(**decoded) + elif self.orient in ["columns", "index"]: + self.obj = create_series_with_explicit_dtype(*data, dtype_if_empty=object) else: - self.obj = Series( - loads(json, dtype=None, numpy=True, precise_float=self.precise_float) - ) + self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object) def _try_convert_types(self): if self.obj is None: diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 7bcca659ee3f6..deeeb0016142c 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -114,7 +114,7 @@ def maybe_color_bp(self, bp): def _make_plot(self): if self.subplots: - self._return_obj = pd.Series() + self._return_obj = pd.Series(dtype=object) for i, (label, y) in enumerate(self._iter_data()): ax = self._get_ax(i) @@ -405,7 +405,8 @@ def boxplot_frame_groupby( ) axes = _flatten(axes) - ret = pd.Series() + ret = pd.Series(dtype=object) + for (key, group), ax in zip(grouped, axes): d = group.boxplot( ax=ax, column=column, fontsize=fontsize, rot=rot, grid=grid, **kwds diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py index dce3c4e4d5e98..da142fa0bd63c 100644 --- a/pandas/tests/arrays/categorical/test_algos.py +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -77,7 +77,7 @@ def test_replace(to_replace, value, result): tm.assert_categorical_equal(cat, expected) -@pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])]) +@pytest.mark.parametrize("empty", [[], pd.Series(dtype=object), np.array([])]) def test_isin_empty(empty): s = pd.Categorical(["a", "b"]) expected = np.array([False, False], dtype=bool) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 75e86a2ee7ecc..3fb4e291d7d91 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -78,7 +78,7 @@ def coerce(request): ((x for x in [1, 2]), True, "generator"), ((_ for _ in []), True, "generator-empty"), (Series([1]), True, "Series"), - (Series([]), True, "Series-empty"), + (Series([], dtype=object), True, "Series-empty"), (Series(["a"]).str, True, "StringMethods"), (Series([], dtype="O").str, True, "StringMethods-empty"), (Index([1]), True, "Index"), @@ -139,7 +139,7 @@ def __getitem__(self): def test_is_array_like(): - assert inference.is_array_like(Series([])) + assert inference.is_array_like(Series([], dtype=object)) assert inference.is_array_like(Series([1, 2])) assert inference.is_array_like(np.array(["a", "b"])) assert inference.is_array_like(Index(["2016-01-01"])) @@ -165,7 +165,7 @@ class DtypeList(list): {"a": 1}, {1, "a"}, Series([1]), - Series([]), + Series([], dtype=object), Series(["a"]).str, (x for x in range(5)), ], @@ -1404,7 +1404,7 @@ def test_is_scalar_pandas_scalars(self): assert is_scalar(DateOffset(days=1)) def test_is_scalar_pandas_containers(self): - assert not is_scalar(Series()) + assert not is_scalar(Series(dtype=object)) assert not is_scalar(Series([1])) assert not is_scalar(DataFrame()) assert not is_scalar(DataFrame([[1]])) diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 89474cf8fa953..5e7c6e4b48682 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -90,7 +90,8 @@ def test_isna_isnull(self, isna_f): assert not isna_f(-np.inf) # type - assert not isna_f(type(pd.Series())) + assert not isna_f(type(pd.Series(dtype=object))) + assert not isna_f(type(pd.Series(dtype=np.float64))) assert not isna_f(type(pd.DataFrame())) # series diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 9a7cd4ace686f..716be92ebca3f 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -2572,7 +2572,7 @@ def test_xs_corner(self): # no columns but Index(dtype=object) df = DataFrame(index=["a", "b", "c"]) result = df.xs("a") - expected = Series([], name="a", index=pd.Index([], dtype=object)) + expected = Series([], name="a", index=pd.Index([]), dtype=np.float64) tm.assert_series_equal(result, expected) def test_xs_duplicates(self): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 005ca8d95182e..5c14c3cd2a2b5 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1067,13 +1067,13 @@ def test_mean_mixed_datetime_numeric(self, tz): tm.assert_series_equal(result, expected) @pytest.mark.parametrize("tz", [None, "UTC"]) - def test_mean_excludeds_datetimes(self, tz): + def test_mean_excludes_datetimes(self, tz): # https://github.com/pandas-dev/pandas/issues/24752 # Our long-term desired behavior is unclear, but the behavior in # 0.24.0rc1 was buggy. df = pd.DataFrame({"A": [pd.Timestamp("2000", tz=tz)] * 2}) result = df.mean() - expected = pd.Series() + expected = pd.Series(dtype=np.float64) tm.assert_series_equal(result, expected) def test_mean_mixed_string_decimal(self): @@ -1907,7 +1907,7 @@ def test_isin(self): expected = DataFrame([df.loc[s].isin(other) for s in df.index]) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) + @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) def test_isin_empty(self, empty): # GH 16991 df = DataFrame({"A": ["a", "b", "c"], "B": ["a", "e", "f"]}) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 26a3c738750ca..eb98bdc49f976 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -105,13 +105,15 @@ def test_apply_with_reduce_empty(self): result = empty_frame.apply(x.append, axis=1, result_type="expand") tm.assert_frame_equal(result, empty_frame) result = empty_frame.apply(x.append, axis=1, result_type="reduce") - tm.assert_series_equal(result, Series([], index=pd.Index([], dtype=object))) + expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64) + tm.assert_series_equal(result, expected) empty_with_cols = DataFrame(columns=["a", "b", "c"]) result = empty_with_cols.apply(x.append, axis=1, result_type="expand") tm.assert_frame_equal(result, empty_with_cols) result = empty_with_cols.apply(x.append, axis=1, result_type="reduce") - tm.assert_series_equal(result, Series([], index=pd.Index([], dtype=object))) + expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64) + tm.assert_series_equal(result, expected) # Ensure that x.append hasn't been called assert x == [] @@ -134,7 +136,7 @@ def test_nunique_empty(self): tm.assert_series_equal(result, expected) result = df.T.nunique() - expected = Series([], index=pd.Index([])) + expected = Series([], index=pd.Index([]), dtype=np.float64) tm.assert_series_equal(result, expected) def test_apply_standard_nonunique(self): @@ -1284,16 +1286,16 @@ def func(group_col): _get_cython_table_params( DataFrame(), [ - ("sum", Series()), - ("max", Series()), - ("min", Series()), + ("sum", Series(dtype="float64")), + ("max", Series(dtype="float64")), + ("min", Series(dtype="float64")), ("all", Series(dtype=bool)), ("any", Series(dtype=bool)), - ("mean", Series()), - ("prod", Series()), - ("std", Series()), - ("var", Series()), - ("median", Series()), + ("mean", Series(dtype="float64")), + ("prod", Series(dtype="float64")), + ("std", Series(dtype="float64")), + ("var", Series(dtype="float64")), + ("median", Series(dtype="float64")), ], ), _get_cython_table_params( diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 88bd5a4fedfae..f6e203afb0898 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -470,7 +470,7 @@ def test_arith_flex_series(self, simple_frame): def test_arith_flex_zero_len_raises(self): # GH 19522 passing fill_value to frame flex arith methods should # raise even in the zero-length special cases - ser_len0 = pd.Series([]) + ser_len0 = pd.Series([], dtype=object) df_len0 = pd.DataFrame(columns=["A", "B"]) df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) diff --git a/pandas/tests/frame/test_asof.py b/pandas/tests/frame/test_asof.py index 9a7d806c79dc3..89be3779e5748 100644 --- a/pandas/tests/frame/test_asof.py +++ b/pandas/tests/frame/test_asof.py @@ -67,7 +67,9 @@ def test_missing(self, date_range_frame): df = date_range_frame.iloc[:N].copy() result = df.asof("1989-12-31") - expected = Series(index=["A", "B"], name=Timestamp("1989-12-31")) + expected = Series( + index=["A", "B"], name=Timestamp("1989-12-31"), dtype=np.float64 + ) tm.assert_series_equal(result, expected) result = df.asof(to_datetime(["1989-12-31"])) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index ce0ebdbe56354..08dbeb9e585f1 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -25,6 +25,7 @@ date_range, isna, ) +from pandas.core.construction import create_series_with_explicit_dtype import pandas.util.testing as tm MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"] @@ -1216,7 +1217,9 @@ def test_constructor_list_of_series(self): OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]), OrderedDict([["b", 3], ["c", 4], ["d", 6]]), ] - data = [Series(d) for d in data] + data = [ + create_series_with_explicit_dtype(d, dtype_if_empty=object) for d in data + ] result = DataFrame(data) sdict = OrderedDict(zip(range(len(data)), data)) @@ -1226,7 +1229,7 @@ def test_constructor_list_of_series(self): result2 = DataFrame(data, index=np.arange(6)) tm.assert_frame_equal(result, result2) - result = DataFrame([Series()]) + result = DataFrame([Series(dtype=object)]) expected = DataFrame(index=[0]) tm.assert_frame_equal(result, expected) @@ -1450,7 +1453,7 @@ def test_constructor_Series_named(self): DataFrame(s, columns=[1, 2]) # #2234 - a = Series([], name="x") + a = Series([], name="x", dtype=object) df = DataFrame(a) assert df.columns[0] == "x" @@ -2356,11 +2359,11 @@ def test_from_records_series_list_dict(self): def test_to_frame_with_falsey_names(self): # GH 16114 - result = Series(name=0).to_frame().dtypes - expected = Series({0: np.float64}) + result = Series(name=0, dtype=object).to_frame().dtypes + expected = Series({0: object}) tm.assert_series_equal(result, expected) - result = DataFrame(Series(name=0)).dtypes + result = DataFrame(Series(name=0, dtype=object)).dtypes tm.assert_series_equal(result, expected) @pytest.mark.parametrize("dtype", [None, "uint8", "category"]) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 6709cdcb1eebf..d8d56e90a2f31 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -656,8 +656,8 @@ def test_astype_dict_like(self, dtype_class): # GH 16717 # if dtypes provided is empty, the resulting DataFrame # should be the same as the original DataFrame - dt7 = dtype_class({}) - result = df.astype(dt7) + dt7 = dtype_class({}) if dtype_class is dict else dtype_class({}, dtype=object) + equiv = df.astype(dt7) tm.assert_frame_equal(df, equiv) tm.assert_frame_equal(df, original) diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 78953d43677fc..5ca7dd32200ee 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -472,7 +472,7 @@ def test_quantile_empty_no_columns(self): df = pd.DataFrame(pd.date_range("1/1/18", periods=5)) df.columns.name = "captain tightpants" result = df.quantile(0.5) - expected = pd.Series([], index=[], name=0.5) + expected = pd.Series([], index=[], name=0.5, dtype=np.float64) expected.index.name = "captain tightpants" tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index 434ea6ea7b4f0..3b01ae0c3c2e8 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -1251,7 +1251,7 @@ def test_replace_with_empty_dictlike(self, mix_abc): # GH 15289 df = DataFrame(mix_abc) tm.assert_frame_equal(df, df.replace({})) - tm.assert_frame_equal(df, df.replace(Series([]))) + tm.assert_frame_equal(df, df.replace(Series([], dtype=object))) tm.assert_frame_equal(df, df.replace({"b": {}})) tm.assert_frame_equal(df, df.replace(Series({"b": {}}))) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 0912a8901dc6a..0ff9d7fcdb209 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -33,6 +33,7 @@ def _construct(self, shape, value=None, dtype=None, **kwargs): if is_scalar(value): if value == "empty": arr = None + dtype = np.float64 # remove the info axis kwargs.pop(self._typ._info_axis_name, None) @@ -732,13 +733,10 @@ def test_squeeze(self): tm.assert_series_equal(df.squeeze(), df["A"]) # don't fail with 0 length dimensions GH11229 & GH8999 - empty_series = Series([], name="five") + empty_series = Series([], name="five", dtype=np.float64) empty_frame = DataFrame([empty_series]) - - [ - tm.assert_series_equal(empty_series, higher_dim.squeeze()) - for higher_dim in [empty_series, empty_frame] - ] + tm.assert_series_equal(empty_series, empty_series.squeeze()) + tm.assert_series_equal(empty_series, empty_frame.squeeze()) # axis argument df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] @@ -898,10 +896,10 @@ def test_equals(self): # GH 8437 a = pd.Series([False, np.nan]) b = pd.Series([False, np.nan]) - c = pd.Series(index=range(2)) - d = pd.Series(index=range(2)) - e = pd.Series(index=range(2)) - f = pd.Series(index=range(2)) + c = pd.Series(index=range(2), dtype=object) + d = c.copy() + e = c.copy() + f = c.copy() c[:-1] = d[:-1] = e[0] = f[0] = False assert a.equals(a) assert a.equals(b) @@ -940,7 +938,7 @@ def test_pipe_tuple_error(self): @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) def test_axis_classmethods(self, box): - obj = box() + obj = box(dtype=object) values = ( list(box._AXIS_NAMES.keys()) + list(box._AXIS_NUMBERS.keys()) diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index 096a5aa99bd80..aaf523956aaed 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -224,7 +224,7 @@ def test_to_xarray_index_types(self, index): def test_to_xarray(self): from xarray import DataArray - s = Series([]) + s = Series([], dtype=object) s.index.name = "foo" result = s.to_xarray() assert len(result) == 0 diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 9882f12714d2d..8e9554085b9ee 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -20,7 +20,7 @@ def test_cumcount(self): def test_cumcount_empty(self): ge = DataFrame().groupby(level=0) - se = Series().groupby(level=0) + se = Series(dtype=object).groupby(level=0) # edge case, as this is usually considered float e = Series(dtype="int64") @@ -95,7 +95,7 @@ def test_ngroup_one_group(self): def test_ngroup_empty(self): ge = DataFrame().groupby(level=0) - se = Series().groupby(level=0) + se = Series(dtype=object).groupby(level=0) # edge case, as this is usually considered float e = Series(dtype="int64") diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 2ce04fc774083..b3ee12b6691d7 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -593,5 +593,5 @@ def test_filter_dropna_with_empty_groups(): tm.assert_series_equal(result_false, expected_false) result_true = groupped.filter(lambda x: x.mean() > 1, dropna=True) - expected_true = pd.Series(index=pd.Index([], dtype=int)) + expected_true = pd.Series(index=pd.Index([], dtype=int), dtype=np.float64) tm.assert_series_equal(result_true, expected_true) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index c41f762e9128d..4ca23c61ba920 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1047,7 +1047,7 @@ def test_nunique_with_object(): def test_nunique_with_empty_series(): # GH 12553 - data = pd.Series(name="name") + data = pd.Series(name="name", dtype=object) result = data.groupby(level=0).nunique() expected = pd.Series(name="name", dtype="int64") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index e4edc64016567..2c84c2f034fc6 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -585,9 +585,18 @@ def test_list_grouper_with_nat(self): @pytest.mark.parametrize( "func,expected", [ - ("transform", pd.Series(name=2, index=pd.RangeIndex(0, 0, 1))), - ("agg", pd.Series(name=2, index=pd.Float64Index([], name=1))), - ("apply", pd.Series(name=2, index=pd.Float64Index([], name=1))), + ( + "transform", + pd.Series(name=2, dtype=np.float64, index=pd.RangeIndex(0, 0, 1)), + ), + ( + "agg", + pd.Series(name=2, dtype=np.float64, index=pd.Float64Index([], name=1)), + ), + ( + "apply", + pd.Series(name=2, dtype=np.float64, index=pd.Float64Index([], name=1)), + ), ], ) def test_evaluate_with_empty_groups(self, func, expected): @@ -602,7 +611,7 @@ def test_evaluate_with_empty_groups(self, func, expected): def test_groupby_empty(self): # https://github.com/pandas-dev/pandas/issues/27190 - s = pd.Series([], name="name") + s = pd.Series([], name="name", dtype="float64") gr = s.groupby([]) result = gr.mean() @@ -731,7 +740,7 @@ def test_get_group_grouped_by_tuple(self): def test_groupby_with_empty(self): index = pd.DatetimeIndex(()) data = () - series = pd.Series(data, index) + series = pd.Series(data, index, dtype=object) grouper = pd.Grouper(freq="D") grouped = series.groupby(grouper) assert next(iter(grouped), None) is None diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index 42244626749b9..6eedfca129856 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -72,7 +72,7 @@ def test_map_callable(self): "mapper", [ lambda values, index: {i: e for e, i in zip(values, index)}, - lambda values, index: pd.Series(values, index), + lambda values, index: pd.Series(values, index, dtype=object), ], ) def test_map_dictlike(self, mapper): diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index 4851dd5a55c1e..2bcaa973acd6b 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -100,9 +100,9 @@ def test_dti_shift_localized(self, tzstr): def test_dti_shift_across_dst(self): # GH 8616 idx = date_range("2013-11-03", tz="America/Chicago", periods=7, freq="H") - s = Series(index=idx[:-1]) + s = Series(index=idx[:-1], dtype=object) result = s.shift(freq="H") - expected = Series(index=idx[1:]) + expected = Series(index=idx[1:], dtype=object) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c0c677b076e2c..e62d50f64d8ff 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2001,7 +2001,7 @@ def test_isin_level_kwarg_bad_label_raises(self, label, indices): with pytest.raises(KeyError, match=msg): index.isin([], level=label) - @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) + @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) def test_isin_empty(self, empty): # see gh-16991 index = Index(["a", "b"]) diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index db6dddfdca11b..e5b2c83f29030 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -93,7 +93,7 @@ def setup_method(self, method): self.frame_ts_rev = DataFrame(np.random.randn(4, 4), index=dates_rev) self.frame_empty = DataFrame() - self.series_empty = Series() + self.series_empty = Series(dtype=object) # form agglomerates for kind in self._kinds: diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 76425c72ce4f9..b6b9f7f205394 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -48,7 +48,9 @@ def test_loc_getitem_series(self): empty = Series(data=[], dtype=np.float64) expected = Series( - [], index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64) + [], + index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64), + dtype=np.float64, ) result = x.loc[empty] tm.assert_series_equal(result, expected) @@ -70,7 +72,9 @@ def test_loc_getitem_array(self): # empty array: empty = np.array([]) expected = Series( - [], index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64) + [], + index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64), + dtype="float64", ) result = x.loc[empty] tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index e4d387fd3ac38..f9bded5b266f1 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -286,7 +286,7 @@ def test_iloc_getitem_dups(self): def test_iloc_getitem_array(self): # array like - s = Series(index=range(1, 4)) + s = Series(index=range(1, 4), dtype=object) self.check_result( "iloc", s.index, @@ -499,7 +499,7 @@ def test_iloc_getitem_frame(self): tm.assert_frame_equal(result, expected) # with index-like - s = Series(index=range(1, 5)) + s = Series(index=range(1, 5), dtype=object) result = df.iloc[s.index] with catch_warnings(record=True): filterwarnings("ignore", "\\n.ix", FutureWarning) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 25b8713eb0307..d75afd1540f22 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -895,7 +895,7 @@ def test_range_in_series_indexing(self): # range can cause an indexing error # GH 11652 for x in [5, 999999, 1000000]: - s = Series(index=range(x)) + s = Series(index=range(x), dtype=np.float64) s.loc[range(1)] = 42 tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0])) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index cb523efb78cf4..e5e899bfb7f0d 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -217,7 +217,7 @@ def test_loc_getitem_label_array_like(self): # array like self.check_result( "loc", - Series(index=[0, 2, 4]).index, + Series(index=[0, 2, 4], dtype=object).index, "ix", [0, 2, 4], typs=["ints", "uints"], @@ -225,7 +225,7 @@ def test_loc_getitem_label_array_like(self): ) self.check_result( "loc", - Series(index=[3, 6, 9]).index, + Series(index=[3, 6, 9], dtype=object).index, "ix", [3, 6, 9], typs=["ints", "uints"], @@ -282,7 +282,7 @@ def test_loc_to_fail(self): # GH 7496 # loc should not fallback - s = Series() + s = Series(dtype=object) s.loc[1] = 1 s.loc["a"] = 2 @@ -794,13 +794,13 @@ def test_setitem_new_key_tz(self): ] expected = pd.Series(vals, index=["foo", "bar"]) - ser = pd.Series() + ser = pd.Series(dtype=object) ser["foo"] = vals[0] ser["bar"] = vals[1] tm.assert_series_equal(ser, expected) - ser = pd.Series() + ser = pd.Series(dtype=object) ser.loc["foo"] = vals[0] ser.loc["bar"] = vals[1] @@ -1016,7 +1016,7 @@ def test_loc_reverse_assignment(self): data = [1, 2, 3, 4, 5, 6] + [None] * 4 expected = Series(data, index=range(2010, 2020)) - result = pd.Series(index=range(2010, 2020)) + result = pd.Series(index=range(2010, 2020), dtype=np.float64) result.loc[2015:2010:-1] = [6, 5, 4, 3, 2, 1] tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index aa49edd51aa39..3adc206335e6f 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -368,19 +368,19 @@ def test_partial_set_empty_series(self): # GH5226 # partially set with an empty object series - s = Series() + s = Series(dtype=object) s.loc[1] = 1 tm.assert_series_equal(s, Series([1], index=[1])) s.loc[3] = 3 tm.assert_series_equal(s, Series([1, 3], index=[1, 3])) - s = Series() + s = Series(dtype=object) s.loc[1] = 1.0 tm.assert_series_equal(s, Series([1.0], index=[1])) s.loc[3] = 3.0 tm.assert_series_equal(s, Series([1.0, 3.0], index=[1, 3])) - s = Series() + s = Series(dtype=object) s.loc["foo"] = 1 tm.assert_series_equal(s, Series([1], index=["foo"])) s.loc["bar"] = 3 @@ -512,11 +512,11 @@ def test_partial_set_empty_frame_row(self): def test_partial_set_empty_frame_set_series(self): # GH 5756 # setting with empty Series - df = DataFrame(Series()) - tm.assert_frame_equal(df, DataFrame({0: Series()})) + df = DataFrame(Series(dtype=object)) + tm.assert_frame_equal(df, DataFrame({0: Series(dtype=object)})) - df = DataFrame(Series(name="foo")) - tm.assert_frame_equal(df, DataFrame({"foo": Series()})) + df = DataFrame(Series(name="foo", dtype=object)) + tm.assert_frame_equal(df, DataFrame({"foo": Series(dtype=object)})) def test_partial_set_empty_frame_empty_copy_assignment(self): # GH 5932 diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 004a1d184537d..e875a6f137d80 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1017,7 +1017,7 @@ def test_east_asian_unicode_true(self): def test_to_string_buffer_all_unicode(self): buf = StringIO() - empty = DataFrame({"c/\u03c3": Series()}) + empty = DataFrame({"c/\u03c3": Series(dtype=object)}) nonempty = DataFrame({"c/\u03c3": Series([1, 2, 3])}) print(empty, file=buf) @@ -2765,7 +2765,7 @@ def test_to_string_length(self): assert res == exp def test_to_string_na_rep(self): - s = pd.Series(index=range(100)) + s = pd.Series(index=range(100), dtype=np.float64) res = s.to_string(na_rep="foo", max_rows=2) exp = "0 foo\n ..\n99 foo" assert res == exp diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index d31aa04b223e8..bce3d1de849aa 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -53,7 +53,7 @@ def setup(self, datapath): self.objSeries = tm.makeObjectSeries() self.objSeries.name = "objects" - self.empty_series = Series([], index=[]) + self.empty_series = Series([], index=[], dtype=np.float64) self.empty_frame = DataFrame() self.frame = _frame.copy() diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index d79280f9ea494..d9a76fe97f813 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -2376,8 +2376,8 @@ def test_frame(self, compression, setup_path): @td.xfail_non_writeable def test_empty_series_frame(self, setup_path): - s0 = Series() - s1 = Series(name="myseries") + s0 = Series(dtype=object) + s1 = Series(name="myseries", dtype=object) df0 = DataFrame() df1 = DataFrame(index=["a", "b", "c"]) df2 = DataFrame(columns=["d", "e", "f"]) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 353946a311c1a..c34f2ebace683 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -395,8 +395,7 @@ def test_empty_tables(self): """ Make sure that read_html ignores empty tables. """ - result = self.read_html( - """ + html = """ @@ -416,8 +415,7 @@ def test_empty_tables(self):
""" - ) - + result = self.read_html(html) assert len(result) == 1 def test_multiple_tbody(self): diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 1e59fbf928876..9e947d4ba878a 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -34,7 +34,7 @@ def test_get_accessor_args(): msg = "should not be called with positional arguments" with pytest.raises(TypeError, match=msg): - func(backend_name="", data=Series(), args=["line", None], kwargs={}) + func(backend_name="", data=Series(dtype=object), args=["line", None], kwargs={}) x, y, kind, kwargs = func( backend_name="", @@ -48,7 +48,10 @@ def test_get_accessor_args(): assert kwargs == {"grid": False} x, y, kind, kwargs = func( - backend_name="pandas.plotting._matplotlib", data=Series(), args=[], kwargs={} + backend_name="pandas.plotting._matplotlib", + data=Series(dtype=object), + args=[], + kwargs={}, ) assert x is None assert y is None diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 80d148c919ab2..3f78a6ac4a778 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -79,7 +79,7 @@ def test_nanops(self): assert pd.isna(getattr(obj, opname)()) assert pd.isna(getattr(obj, opname)(skipna=False)) - obj = klass([]) + obj = klass([], dtype=object) assert pd.isna(getattr(obj, opname)()) assert pd.isna(getattr(obj, opname)(skipna=False)) @@ -528,7 +528,7 @@ def test_empty(self, method, unit, use_bottleneck): with pd.option_context("use_bottleneck", use_bottleneck): # GH#9422 / GH#18921 # Entirely empty - s = Series([]) + s = Series([], dtype=object) # NA by default result = getattr(s, method)() assert result == unit @@ -900,7 +900,7 @@ def test_timedelta64_analytics(self): @pytest.mark.parametrize( "test_input,error_type", [ - (pd.Series([]), ValueError), + (pd.Series([], dtype="float64"), ValueError), # For strings, or any Series with dtype 'O' (pd.Series(["foo", "bar", "baz"]), TypeError), (pd.Series([(1,), (2,)]), TypeError), diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 161581e16b6fe..622b85f2a398c 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -139,7 +139,7 @@ def test_resample_empty_dataframe(empty_frame, freq, resample_method): expected = df.copy() else: # GH14962 - expected = Series([]) + expected = Series([], dtype=object) if isinstance(df.index, PeriodIndex): expected.index = df.index.asfreq(freq=freq) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index f9229e8066be4..5837d526e3978 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1429,10 +1429,11 @@ def test_downsample_across_dst_weekly(): tm.assert_frame_equal(result, expected) idx = pd.date_range("2013-04-01", "2013-05-01", tz="Europe/London", freq="H") - s = Series(index=idx) + s = Series(index=idx, dtype=np.float64) result = s.resample("W").mean() expected = Series( - index=pd.date_range("2013-04-07", freq="W", periods=5, tz="Europe/London") + index=pd.date_range("2013-04-07", freq="W", periods=5, tz="Europe/London"), + dtype=np.float64, ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 93ce7a9480b35..219491367d292 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -594,7 +594,7 @@ def test_resample_with_dst_time_change(self): def test_resample_bms_2752(self): # GH2753 - foo = Series(index=pd.bdate_range("20000101", "20000201")) + foo = Series(index=pd.bdate_range("20000101", "20000201"), dtype=np.float64) res1 = foo.resample("BMS").mean() res2 = foo.resample("BMS").mean().resample("B").mean() assert res1.index[0] == Timestamp("20000103") diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 63f1ef7595f31..8ef35882dcc12 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -27,6 +27,7 @@ isna, read_csv, ) +from pandas.core.construction import create_series_with_explicit_dtype from pandas.tests.extension.decimal import to_decimal import pandas.util.testing as tm @@ -2177,7 +2178,7 @@ def test_concat_period_other_series(self): def test_concat_empty_series(self): # GH 11082 s1 = pd.Series([1, 2, 3], name="x") - s2 = pd.Series(name="y") + s2 = pd.Series(name="y", dtype="float64") res = pd.concat([s1, s2], axis=1) exp = pd.DataFrame( {"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan]}, @@ -2186,7 +2187,7 @@ def test_concat_empty_series(self): tm.assert_frame_equal(res, exp) s1 = pd.Series([1, 2, 3], name="x") - s2 = pd.Series(name="y") + s2 = pd.Series(name="y", dtype="float64") res = pd.concat([s1, s2], axis=0) # name will be reset exp = pd.Series([1, 2, 3]) @@ -2194,7 +2195,7 @@ def test_concat_empty_series(self): # empty Series with no name s1 = pd.Series([1, 2, 3], name="x") - s2 = pd.Series(name=None) + s2 = pd.Series(name=None, dtype="float64") res = pd.concat([s1, s2], axis=1) exp = pd.DataFrame( {"x": [1, 2, 3], 0: [np.nan, np.nan, np.nan]}, @@ -2209,7 +2210,9 @@ def test_concat_empty_series_timelike(self, tz, values): # GH 18447 first = Series([], dtype="M8[ns]").dt.tz_localize(tz) - second = Series(values) + dtype = None if values else np.float64 + second = Series(values, dtype=dtype) + expected = DataFrame( { 0: pd.Series([pd.NaT] * len(values), dtype="M8[ns]").dt.tz_localize(tz), @@ -2569,7 +2572,8 @@ def test_concat_odered_dict(self): @pytest.mark.parametrize("dt", np.sctypes["float"]) def test_concat_no_unnecessary_upcast(dt, pdt): # GH 13247 - dims = pdt().ndim + dims = pdt(dtype=object).ndim + dfs = [ pdt(np.array([1], dtype=dt, ndmin=dims)), pdt(np.array([np.nan], dtype=dt, ndmin=dims)), @@ -2579,7 +2583,7 @@ def test_concat_no_unnecessary_upcast(dt, pdt): assert x.values.dtype == dt -@pytest.mark.parametrize("pdt", [pd.Series, pd.DataFrame]) +@pytest.mark.parametrize("pdt", [create_series_with_explicit_dtype, pd.DataFrame]) @pytest.mark.parametrize("dt", np.sctypes["int"]) def test_concat_will_upcast(dt, pdt): with catch_warnings(record=True): @@ -2605,7 +2609,8 @@ def test_concat_empty_and_non_empty_frame_regression(): def test_concat_empty_and_non_empty_series_regression(): # GH 18187 regression test s1 = pd.Series([1]) - s2 = pd.Series([]) + s2 = pd.Series([], dtype=object) + expected = s1 result = pd.concat([s1, s2]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index 7509d21b8832f..c47b99fa38989 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -230,7 +230,7 @@ def test_reindex_with_datetimes(): def test_reindex_corner(datetime_series): # (don't forget to fix this) I think it's fixed - empty = Series() + empty = Series(dtype=object) empty.reindex(datetime_series.index, method="pad") # it works # corner case: pad empty series @@ -539,8 +539,9 @@ def test_drop_with_ignore_errors(): def test_drop_empty_list(index, drop_labels): # GH 21494 expected_index = [i for i in index if i not in drop_labels] - series = pd.Series(index=index).drop(drop_labels) - tm.assert_series_equal(series, pd.Series(index=expected_index)) + series = pd.Series(index=index, dtype=object).drop(drop_labels) + expected = pd.Series(index=expected_index, dtype=object) + tm.assert_series_equal(series, expected) @pytest.mark.parametrize( @@ -554,4 +555,5 @@ def test_drop_empty_list(index, drop_labels): def test_drop_non_empty_list(data, index, drop_labels): # GH 21494 and GH 16877 with pytest.raises(KeyError, match="not found in axis"): - pd.Series(data=data, index=index).drop(drop_labels) + dtype = object if data is None else None + pd.Series(data=data, index=index, dtype=dtype).drop(drop_labels) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index fab3310fa3dfe..83c1c0ff16f4c 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -105,7 +105,7 @@ def test_series_set_value(): dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)] index = DatetimeIndex(dates) - s = Series()._set_value(dates[0], 1.0) + s = Series(dtype=object)._set_value(dates[0], 1.0) s2 = s._set_value(dates[1], np.nan) expected = Series([1.0, np.nan], index=index) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 173bc9d9d6409..5bebd480ce8d4 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -105,7 +105,9 @@ def test_getitem_get(datetime_series, string_series, object_series): # None # GH 5652 - for s in [Series(), Series(index=list("abc"))]: + s1 = Series(dtype=object) + s2 = Series(dtype=object, index=list("abc")) + for s in [s1, s2]: result = s.get(None) assert result is None @@ -130,7 +132,7 @@ def test_getitem_generator(string_series): def test_type_promotion(): # GH12599 - s = pd.Series() + s = pd.Series(dtype=object) s["a"] = pd.Timestamp("2016-01-01") s["b"] = 3.0 s["c"] = "foo" @@ -168,7 +170,7 @@ def test_getitem_out_of_bounds(datetime_series): datetime_series[len(datetime_series)] # GH #917 - s = Series([]) + s = Series([], dtype=object) with pytest.raises(IndexError, match=msg): s[-1] @@ -324,12 +326,12 @@ def test_setitem(datetime_series, string_series): # Test for issue #10193 key = pd.Timestamp("2012-01-01") - series = pd.Series() + series = pd.Series(dtype=object) series[key] = 47 expected = pd.Series(47, [key]) tm.assert_series_equal(series, expected) - series = pd.Series([], pd.DatetimeIndex([], freq="D")) + series = pd.Series([], pd.DatetimeIndex([], freq="D"), dtype=object) series[key] = 47 expected = pd.Series(47, pd.DatetimeIndex([key], freq="D")) tm.assert_series_equal(series, expected) @@ -637,7 +639,7 @@ def test_setitem_na(): def test_timedelta_assignment(): # GH 8209 - s = Series([]) + s = Series([], dtype=object) s.loc["B"] = timedelta(1) tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"])) diff --git a/pandas/tests/series/indexing/test_numeric.py b/pandas/tests/series/indexing/test_numeric.py index 426a98b00827e..a641b47f2e690 100644 --- a/pandas/tests/series/indexing/test_numeric.py +++ b/pandas/tests/series/indexing/test_numeric.py @@ -150,7 +150,7 @@ def test_delitem(): tm.assert_series_equal(s, expected) # empty - s = Series() + s = Series(dtype=object) with pytest.raises(KeyError, match=r"^0$"): del s[0] diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index fe9306a06efc7..71b4819bb4da8 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -843,7 +843,7 @@ def test_isin_with_i8(self): result = s.isin(s[0:2]) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) + @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) def test_isin_empty(self, empty): # see gh-16991 s = Series(["a", "b"]) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 8acab3fa2541d..5da0ee9b5b1c0 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -266,7 +266,7 @@ def get_dir(s): ) def test_index_tab_completion(self, index): # dir contains string-like values of the Index. - s = pd.Series(index=index) + s = pd.Series(index=index, dtype=object) dir_s = dir(s) for i, x in enumerate(s.index.unique(level=0)): if i < 100: @@ -275,7 +275,7 @@ def test_index_tab_completion(self, index): assert x not in dir_s def test_not_hashable(self): - s_empty = Series() + s_empty = Series(dtype=object) s = Series([1]) msg = "'Series' objects are mutable, thus they cannot be hashed" with pytest.raises(TypeError, match=msg): @@ -474,10 +474,11 @@ def test_str_attribute(self): s.str.repeat(2) def test_empty_method(self): - s_empty = pd.Series() + s_empty = pd.Series(dtype=object) assert s_empty.empty - for full_series in [pd.Series([1]), pd.Series(index=[1])]: + s2 = pd.Series(index=[1], dtype=object) + for full_series in [pd.Series([1]), s2]: assert not full_series.empty def test_tab_complete_warning(self, ip): diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index eb4f3273f8713..8956b8b0b2d20 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -37,7 +37,7 @@ def test_apply(self, datetime_series): assert s.name == rs.name # index but no data - s = Series(index=[1, 2, 3]) + s = Series(index=[1, 2, 3], dtype=np.float64) rs = s.apply(lambda x: x) tm.assert_series_equal(s, rs) @@ -340,7 +340,7 @@ def test_non_callable_aggregates(self): "series, func, expected", chain( _get_cython_table_params( - Series(), + Series(dtype=np.float64), [ ("sum", 0), ("max", np.nan), @@ -395,8 +395,11 @@ def test_agg_cython_table(self, series, func, expected): "series, func, expected", chain( _get_cython_table_params( - Series(), - [("cumprod", Series([], Index([]))), ("cumsum", Series([], Index([])))], + Series(dtype=np.float64), + [ + ("cumprod", Series([], Index([]), dtype=np.float64)), + ("cumsum", Series([], Index([]), dtype=np.float64)), + ], ), _get_cython_table_params( Series([np.nan, 1, 2, 3]), diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 9d02c1bdc2d9c..c6f4ce364f328 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -107,7 +107,8 @@ def test_combine_first(self): # corner case s = Series([1.0, 2, 3], index=[0, 1, 2]) - result = s.combine_first(Series([], index=[])) + empty = Series([], index=[], dtype=object) + result = s.combine_first(empty) s.index = s.index.astype("O") tm.assert_series_equal(s, result) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 34b11a0d008aa..293ec9580436e 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -52,8 +52,10 @@ class TestSeriesConstructors: ], ) def test_empty_constructor(self, constructor, check_index_type): - expected = Series() - result = constructor() + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + expected = Series() + result = constructor() + assert len(result.index) == 0 tm.assert_series_equal(result, expected, check_index_type=check_index_type) @@ -76,8 +78,8 @@ def test_scalar_conversion(self): assert int(Series([1.0])) == 1 def test_constructor(self, datetime_series): - empty_series = Series() - + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + empty_series = Series() assert datetime_series.index.is_all_dates # Pass in Series @@ -94,7 +96,8 @@ def test_constructor(self, datetime_series): assert mixed[1] is np.NaN assert not empty_series.index.is_all_dates - assert not Series().index.is_all_dates + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + assert not Series().index.is_all_dates # exception raised is of type Exception with pytest.raises(Exception, match="Data must be 1-dimensional"): @@ -113,8 +116,9 @@ def test_constructor(self, datetime_series): @pytest.mark.parametrize("input_class", [list, dict, OrderedDict]) def test_constructor_empty(self, input_class): - empty = Series() - empty2 = Series(input_class()) + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + empty = Series() + empty2 = Series(input_class()) # these are Index() and RangeIndex() which don't compare type equal # but are just .equals @@ -132,8 +136,9 @@ def test_constructor_empty(self, input_class): if input_class is not list: # With index: - empty = Series(index=range(10)) - empty2 = Series(input_class(), index=range(10)) + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + empty = Series(index=range(10)) + empty2 = Series(input_class(), index=range(10)) tm.assert_series_equal(empty, empty2) # With index and dtype float64: @@ -165,7 +170,8 @@ def test_constructor_dtype_only(self, dtype, index): assert len(result) == 0 def test_constructor_no_data_index_order(self): - result = pd.Series(index=["b", "a", "c"]) + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + result = pd.Series(index=["b", "a", "c"]) assert result.index.tolist() == ["b", "a", "c"] def test_constructor_no_data_string_type(self): @@ -631,7 +637,8 @@ def test_constructor_limit_copies(self, index): assert s._data.blocks[0].values is not index def test_constructor_pass_none(self): - s = Series(None, index=range(5)) + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + s = Series(None, index=range(5)) assert s.dtype == np.float64 s = Series(None, index=range(5), dtype=object) @@ -639,8 +646,9 @@ def test_constructor_pass_none(self): # GH 7431 # inference on the index - s = Series(index=np.array([None])) - expected = Series(index=Index([None])) + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + s = Series(index=np.array([None])) + expected = Series(index=Index([None])) tm.assert_series_equal(s, expected) def test_constructor_pass_nan_nat(self): @@ -1029,7 +1037,7 @@ def test_constructor_dict(self): pidx = tm.makePeriodIndex(100) d = {pidx[0]: 0, pidx[1]: 1} result = Series(d, index=pidx) - expected = Series(np.nan, pidx) + expected = Series(np.nan, pidx, dtype=np.float64) expected.iloc[0] = 0 expected.iloc[1] = 1 tm.assert_series_equal(result, expected) @@ -1135,7 +1143,7 @@ def test_fromDict(self): def test_fromValue(self, datetime_series): - nans = Series(np.NaN, index=datetime_series.index) + nans = Series(np.NaN, index=datetime_series.index, dtype=np.float64) assert nans.dtype == np.float_ assert len(nans) == len(datetime_series) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 065be966efa49..22b00425abb6b 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -205,7 +205,11 @@ def test_astype_dict_like(self, dtype_class): # GH16717 # if dtypes provided is empty, it should error - dt5 = dtype_class({}) + if dtype_class is Series: + dt5 = dtype_class({}, dtype=object) + else: + dt5 = dtype_class({}) + with pytest.raises(KeyError, match=msg): s.astype(dt5) @@ -408,7 +412,8 @@ def test_astype_empty_constructor_equality(self, dtype): "m", # Generic timestamps raise a ValueError. Already tested. ): init_empty = Series([], dtype=dtype) - as_type_empty = Series([]).astype(dtype) + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + as_type_empty = Series([]).astype(dtype) tm.assert_series_equal(init_empty, as_type_empty) def test_arg_for_errors_in_astype(self): @@ -472,7 +477,9 @@ def test_infer_objects_series(self): tm.assert_series_equal(actual, expected) def test_is_homogeneous_type(self): - assert Series()._is_homogeneous_type + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + empty = Series() + assert empty._is_homogeneous_type assert Series([1, 2])._is_homogeneous_type assert Series(pd.Categorical([1, 2]))._is_homogeneous_type diff --git a/pandas/tests/series/test_duplicates.py b/pandas/tests/series/test_duplicates.py index 0f7e3e307ed19..666354e70bdd4 100644 --- a/pandas/tests/series/test_duplicates.py +++ b/pandas/tests/series/test_duplicates.py @@ -2,6 +2,7 @@ import pytest from pandas import Categorical, Series +from pandas.core.construction import create_series_with_explicit_dtype import pandas.util.testing as tm @@ -70,7 +71,7 @@ def test_unique_data_ownership(): ) def test_is_unique(data, expected): # GH11946 / GH25180 - s = Series(data) + s = create_series_with_explicit_dtype(data, dtype_if_empty=object) assert s.is_unique is expected diff --git a/pandas/tests/series/test_explode.py b/pandas/tests/series/test_explode.py index 6262da6bdfabf..e79d3c0556cf1 100644 --- a/pandas/tests/series/test_explode.py +++ b/pandas/tests/series/test_explode.py @@ -29,7 +29,7 @@ def test_mixed_type(): def test_empty(): - s = pd.Series() + s = pd.Series(dtype=object) result = s.explode() expected = s.copy() tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index c5ce125d10ac2..72f08876e71ae 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -710,7 +710,7 @@ def test_fillna(self, datetime_series): tm.assert_series_equal(result, expected) result = s1.fillna({}) tm.assert_series_equal(result, s1) - result = s1.fillna(Series(())) + result = s1.fillna(Series((), dtype=object)) tm.assert_series_equal(result, s1) result = s2.fillna(s1) tm.assert_series_equal(result, s2) @@ -834,7 +834,8 @@ def test_timedelta64_nan(self): # tm.assert_series_equal(selector, expected) def test_dropna_empty(self): - s = Series([]) + s = Series([], dtype=object) + assert len(s.dropna()) == 0 s.dropna(inplace=True) assert len(s) == 0 @@ -1163,7 +1164,7 @@ def test_interpolate_corners(self, kwargs): s = Series([np.nan, np.nan]) tm.assert_series_equal(s.interpolate(**kwargs), s) - s = Series([]).interpolate() + s = Series([], dtype=object).interpolate() tm.assert_series_equal(s.interpolate(**kwargs), s) def test_interpolate_index_values(self): diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 983560d68c28c..06fe64d69fb6b 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -33,7 +33,7 @@ def test_logical_operators_bool_dtype_with_empty(self): s_tft = Series([True, False, True], index=index) s_fff = Series([False, False, False], index=index) - s_empty = Series([]) + s_empty = Series([], dtype=object) res = s_tft & s_empty expected = s_fff @@ -408,11 +408,13 @@ def test_logical_ops_label_based(self): # filling # vs empty - result = a & Series([]) + empty = Series([], dtype=object) + + result = a & empty.copy() expected = Series([False, False, False], list("bca")) tm.assert_series_equal(result, expected) - result = a | Series([]) + result = a | empty.copy() expected = Series([True, False, True], list("bca")) tm.assert_series_equal(result, expected) @@ -428,7 +430,7 @@ def test_logical_ops_label_based(self): # identity # we would like s[s|e] == s to hold for any e, whether empty or not for e in [ - Series([]), + empty.copy(), Series([1], ["z"]), Series(np.nan, b.index), Series(np.nan, a.index), @@ -797,12 +799,12 @@ def test_ops_datetimelike_align(self): tm.assert_series_equal(result, expected) def test_operators_corner(self, datetime_series): - empty = Series([], index=Index([])) + empty = Series([], index=Index([]), dtype=np.float64) result = datetime_series + empty assert np.isnan(result).all() - result = empty + Series([], index=Index([])) + result = empty + empty.copy() assert len(result) == 0 # TODO: this returned NotImplemented earlier, what to do? diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index 1a4a3f523cbbe..4eb275d63e878 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -67,7 +67,7 @@ def test_quantile_multi(self, datetime_series): result = datetime_series.quantile([]) expected = pd.Series( - [], name=datetime_series.name, index=Index([], dtype=float) + [], name=datetime_series.name, index=Index([], dtype=float), dtype="float64" ) tm.assert_series_equal(result, expected) @@ -104,7 +104,8 @@ def test_quantile_nan(self): assert result == expected # all nan/empty - cases = [Series([]), Series([np.nan, np.nan])] + s1 = Series([], dtype=object) + cases = [s1, Series([np.nan, np.nan])] for s in cases: res = s.quantile(0.5) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 8018ecf03960c..4125b5816422a 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -245,7 +245,10 @@ def test_replace_with_empty_dictlike(self): # GH 15289 s = pd.Series(list("abcd")) tm.assert_series_equal(s, s.replace(dict())) - tm.assert_series_equal(s, s.replace(pd.Series([]))) + + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + empty_series = pd.Series([]) + tm.assert_series_equal(s, s.replace(empty_series)) def test_replace_string_with_number(self): # GH 15743 diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index f1661ad034e4c..b687179f176c3 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -62,7 +62,7 @@ def test_name_printing(self): s.name = None assert "Name:" not in repr(s) - s = Series(index=date_range("20010101", "20020101"), name="test") + s = Series(index=date_range("20010101", "20020101"), name="test", dtype=object) assert "Name: test" in repr(s) def test_repr(self, datetime_series, string_series, object_series): @@ -75,7 +75,7 @@ def test_repr(self, datetime_series, string_series, object_series): str(Series(tm.randn(1000), index=np.arange(1000, 0, step=-1))) # empty - str(Series()) + str(Series(dtype=object)) # with NaNs string_series[5:7] = np.NaN diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index 8039b133cae10..fd3445e271699 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -157,8 +157,8 @@ def test_sort_index_multiindex(self, level): def test_sort_index_kind(self): # GH #14444 & #13589: Add support for sort algo choosing - series = Series(index=[3, 2, 1, 4, 3]) - expected_series = Series(index=[1, 2, 3, 3, 4]) + series = Series(index=[3, 2, 1, 4, 3], dtype=object) + expected_series = Series(index=[1, 2, 3, 3, 4], dtype=object) index_sorted_series = series.sort_index(kind="mergesort") tm.assert_series_equal(expected_series, index_sorted_series) @@ -170,13 +170,14 @@ def test_sort_index_kind(self): tm.assert_series_equal(expected_series, index_sorted_series) def test_sort_index_na_position(self): - series = Series(index=[3, 2, 1, 4, 3, np.nan]) + series = Series(index=[3, 2, 1, 4, 3, np.nan], dtype=object) + expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4], dtype=object) - expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4]) index_sorted_series = series.sort_index(na_position="first") tm.assert_series_equal(expected_series_first, index_sorted_series) - expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan]) + expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan], dtype=object) + index_sorted_series = series.sort_index(na_position="last") tm.assert_series_equal(expected_series_last, index_sorted_series) diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index 6b82f890e974b..5e2d23a70e5be 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -32,4 +32,6 @@ def test_subclass_unstack(self): tm.assert_frame_equal(res, exp) def test_subclass_empty_repr(self): - assert "SubclassedSeries" in repr(tm.SubclassedSeries()) + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + sub_series = tm.SubclassedSeries() + assert "SubclassedSeries" in repr(sub_series) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 1587ae5eb7d07..6d00b9f2b09df 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -346,10 +346,9 @@ def test_asfreq(self): def test_asfreq_datetimeindex_empty_series(self): # GH 14320 - expected = Series(index=pd.DatetimeIndex(["2016-09-29 11:00"])).asfreq("H") - result = Series(index=pd.DatetimeIndex(["2016-09-29 11:00"]), data=[3]).asfreq( - "H" - ) + index = pd.DatetimeIndex(["2016-09-29 11:00"]) + expected = Series(index=index, dtype=object).asfreq("H") + result = Series([3], index=index.copy()).asfreq("H") tm.assert_index_equal(expected.index, result.index) def test_pct_change(self, datetime_series): @@ -410,7 +409,7 @@ def test_pct_change_periods_freq( ) tm.assert_series_equal(rs_freq, rs_periods) - empty_ts = Series(index=datetime_series.index) + empty_ts = Series(index=datetime_series.index, dtype=object) rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) tm.assert_series_equal(rs_freq, rs_periods) @@ -457,12 +456,12 @@ def test_first_last_valid(self, datetime_series): assert ts.last_valid_index() is None assert ts.first_valid_index() is None - ser = Series([], index=[]) + ser = Series([], index=[], dtype=object) assert ser.last_valid_index() is None assert ser.first_valid_index() is None # GH12800 - empty = Series() + empty = Series(dtype=object) assert empty.last_valid_index() is None assert empty.first_valid_index() is None diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index c03101265f7e7..5e255e7cd5dcd 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -89,7 +89,7 @@ def test_series_tz_localize_nonexistent(self, tz, method, exp): @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) def test_series_tz_localize_empty(self, tzstr): # GH#2248 - ser = Series() + ser = Series(dtype=object) ser2 = ser.tz_localize("utc") assert ser2.index.tz == pytz.utc diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 02b50d84c6eca..e0e4beffe113a 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -812,7 +812,7 @@ def test_no_cast(self): result = algos.isin(comps, values) tm.assert_numpy_array_equal(expected, result) - @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) + @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) def test_empty(self, empty): # see gh-16991 vals = Index(["a", "b"]) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index d515a015cdbec..5c9a119400319 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -589,7 +589,7 @@ def test_value_counts_bins(self, index_or_series): tm.assert_numpy_array_equal(s.unique(), exp) assert s.nunique() == 3 - s = klass({}) + s = klass({}) if klass is dict else klass({}, dtype=object) expected = Series([], dtype=np.int64) tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) # returned dtype differs depending on original diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 44829423be1bb..204cdee2d9e1f 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1538,7 +1538,7 @@ def test_frame_dict_constructor_empty_series(self): s2 = Series( [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]) ) - s3 = Series() + s3 = Series(dtype=object) # it works! DataFrame({"foo": s1, "bar": s2, "baz": s3}) diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py index 97086f8ab1e85..6b40ff8b3fa1e 100644 --- a/pandas/tests/test_register_accessor.py +++ b/pandas/tests/test_register_accessor.py @@ -45,7 +45,8 @@ def test_register(obj, registrar): with ensure_removed(obj, "mine"): before = set(dir(obj)) registrar("mine")(MyAccessor) - assert obj([]).mine.prop == "item" + o = obj([]) if obj is not pd.Series else obj([], dtype=object) + assert o.mine.prop == "item" after = set(dir(obj)) assert (before ^ after) == {"mine"} assert "mine" in obj._accessors @@ -88,4 +89,4 @@ def __init__(self, data): raise AttributeError("whoops") with pytest.raises(AttributeError, match="whoops"): - pd.Series([]).bad + pd.Series([], dtype=object).bad diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index df3c7fe9c9936..ebbdbd6c29842 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -207,7 +207,7 @@ def test_multiindex_objects(): Series(["a", np.nan, "c"]), Series(["a", None, "c"]), Series([True, False, True]), - Series(), + Series(dtype=object), Index([1, 2, 3]), Index([True, False, True]), DataFrame({"x": ["a", "b", "c"], "y": [1, 2, 3]}), diff --git a/pandas/tests/window/test_moments.py b/pandas/tests/window/test_moments.py index f1c89d3c6c1b4..2c65c9e2ac82c 100644 --- a/pandas/tests/window/test_moments.py +++ b/pandas/tests/window/test_moments.py @@ -108,7 +108,7 @@ def test_cmov_window_corner(self): assert np.isnan(result).all() # empty - vals = pd.Series([]) + vals = pd.Series([], dtype=object) result = vals.rolling(5, center=True, win_type="boxcar").mean() assert len(result) == 0 @@ -674,7 +674,7 @@ def f(x): self._check_moment_func(np.mean, name="apply", func=f, raw=raw) - expected = Series([]) + expected = Series([], dtype="float64") result = expected.rolling(10).apply(lambda x: x.mean(), raw=raw) tm.assert_series_equal(result, expected) @@ -1193,8 +1193,10 @@ def _check_ew(self, name=None, preserve_nan=False): assert not result[11:].isna().any() # check series of length 0 - result = getattr(Series().ewm(com=50, min_periods=min_periods), name)() - tm.assert_series_equal(result, Series()) + result = getattr( + Series(dtype=object).ewm(com=50, min_periods=min_periods), name + )() + tm.assert_series_equal(result, Series(dtype="float64")) # check series of length 1 result = getattr(Series([1.0]).ewm(50, min_periods=min_periods), name)() @@ -1214,7 +1216,7 @@ def _check_ew(self, name=None, preserve_nan=False): def _create_consistency_data(): def create_series(): return [ - Series(), + Series(dtype=object), Series([np.nan]), Series([np.nan, np.nan]), Series([3.0]), @@ -1989,8 +1991,9 @@ def func(A, B, com, **kwargs): assert not np.isnan(result.values[11:]).any() # check series of length 0 - result = func(Series([]), Series([]), 50, min_periods=min_periods) - tm.assert_series_equal(result, Series([])) + empty = Series([], dtype=np.float64) + result = func(empty, empty, 50, min_periods=min_periods) + tm.assert_series_equal(result, empty) # check series of length 1 result = func(Series([1.0]), Series([1.0]), 50, min_periods=min_periods) @@ -2190,7 +2193,7 @@ def test_rolling_functions_window_non_shrinkage_binary(self): def test_moment_functions_zero_length(self): # GH 8056 - s = Series() + s = Series(dtype=np.float64) s_expected = s df1 = DataFrame() df1_expected = df1 @@ -2409,7 +2412,7 @@ def expanding_mean(x, min_periods=1): # here to make this pass self._check_expanding(expanding_mean, np.mean, preserve_nan=False) - ser = Series([]) + ser = Series([], dtype=np.float64) tm.assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean(), raw=raw)) # GH 8080