Skip to content

[#16737] Index type for Series with empty data #32053

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/source/user_guide/missing_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,15 +190,15 @@ The sum of an empty or all-NA Series or column of a DataFrame is 0.

pd.Series([np.nan]).sum()

pd.Series([], dtype="float64").sum()
pd.Series([], dtype="float64", index=[]).sum()

The product of an empty or all-NA Series or column of a DataFrame is 1.

.. ipython:: python

pd.Series([np.nan]).prod()

pd.Series([], dtype="float64").prod()
pd.Series([], dtype="float64", index=[]).prod()


NA values in GroupBy
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ Deprecations
arguments (:issue:`27573`).

- :func:`pandas.api.types.is_categorical` is deprecated and will be removed in a future version; use `:func:pandas.api.types.is_categorical_dtype` instead (:issue:`33385`)
- ``Series([])`` will raise a `DeprecationWarning` regarding its index. The default index type will change from :class:`RangeIndex` to :class:`Index` in a future version, matching the behaviour of ``Series()`` (:issue:`16737`)

.. ---------------------------------------------------------------------------

Expand Down
8 changes: 6 additions & 2 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@
)
from pandas.core.dtypes.missing import isna, na_value_for_dtype

from pandas.core.construction import array, extract_array
from pandas.core.construction import (
array,
create_series_with_explicit_index,
extract_array,
)
from pandas.core.indexers import validate_indices

if TYPE_CHECKING:
Expand Down Expand Up @@ -835,7 +839,7 @@ def mode(values, dropna: bool = True) -> "Series":
warn(f"Unable to sort modes: {err}")

result = _reconstruct_data(result, original.dtype, original)
return Series(result)
return create_series_with_explicit_index(result)


def rank(
Expand Down
9 changes: 6 additions & 3 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@
)
from pandas.core.dtypes.generic import ABCSeries

from pandas.core.construction import create_series_with_explicit_dtype
from pandas.core.construction import (
create_series_with_explicit_dtype,
create_series_with_explicit_index,
)

if TYPE_CHECKING:
from pandas import DataFrame, Series, Index
Expand Down Expand Up @@ -202,15 +205,15 @@ def apply_empty_result(self):

if not should_reduce:
try:
r = self.f(Series([], dtype=np.float64))
r = self.f(create_series_with_explicit_index([], dtype=np.float64))
except Exception:
pass
else:
should_reduce = not isinstance(r, Series)

if should_reduce:
if len(self.agg_axis):
r = self.f(Series([], dtype=np.float64))
r = self.f(create_series_with_explicit_index([], dtype=np.float64))
else:
r = np.nan

Expand Down
55 changes: 54 additions & 1 deletion pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,10 +621,63 @@ def create_series_with_explicit_dtype(
-------
Series
"""
from pandas.core.series import Series
from pandas import RangeIndex

if is_empty_data(data) and dtype is None:
dtype = dtype_if_empty

return create_series_with_explicit_index(
data=data,
index=index,
dtype=dtype,
name=name,
copy=copy,
fastpath=fastpath,
index_if_empty=RangeIndex(0), # non-breaking yet
)


def create_series_with_explicit_index(
data: Any = None,
index: Optional[Union[ArrayLike, "Index"]] = None,
dtype: Optional[Dtype] = None,
name: Optional[str] = None,
copy: bool = False,
fastpath: bool = False,
index_if_empty: Optional["Index"] = None,
) -> "Series":
"""
Helper to pass an explicit index type when instantiating an Series where
data is list-like and empty.

This silences a DeprecationWarning described in GitHub-16737.

Parameters
----------
data : Mirrored from Series.__init__
index : Mirrored from Series.__init__
dtype : Mirrored from Series.__init__
name : Mirrored from Series.__init__
copy : Mirrored from Series.__init__
fastpath : Mirrored from Series.__init__
index_if_empty : instance of (Index, RangeIndex)
This index type will be passed explicitly when Series is initialised
with `data` being list-like and empty.

Returns
-------
Series
"""
from pandas import Index, Series # noqa: F811

# to avoid circular imports
if index_if_empty is None:
index_if_empty = Index([])

# dict's are handled separately in Series.__init__
is_relevant_type = is_list_like(data) and not isinstance(data, dict)
if index is None and is_relevant_type and len(data) == 0:
index = index_if_empty
return Series(
data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath
)
5 changes: 2 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
from pandas.core.arrays.datetimes import tz_to_dtype, validate_tz_from_dtype
from pandas.core.base import IndexOpsMixin, PandasObject
import pandas.core.common as com
from pandas.core.construction import create_series_with_explicit_index
from pandas.core.indexers import deprecate_ndim_indexing
from pandas.core.indexes.frozen import FrozenList
import pandas.core.missing as missing
Expand Down Expand Up @@ -142,9 +143,7 @@ def index_arithmetic_method(self, other):
if isinstance(other, (ABCSeries, ABCDataFrame, ABCTimedeltaIndex)):
return NotImplemented

from pandas import Series

result = op(Series(self), other)
result = op(create_series_with_explicit_index(self), other)
if isinstance(result, tuple):
return (Index(result[0]), Index(result[1]))
return Index(result)
Expand Down
13 changes: 12 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,18 @@ def __init__(
if index is None:
if not is_list_like(data):
data = [data]
index = ibase.default_index(len(data))

n = len(data)
if n == 0:
# gh-16737
warnings.warn(
"The default index type for empty data will be 'Index' "
"instead of 'RangeIndex' in a future version. "
"Specify an index explicitly to silence this warning.",
DeprecationWarning,
stacklevel=2,
)
index = ibase.default_index(n)
elif is_list_like(data):

# a scalar numpy array is list-like but doesn't
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

from pandas.core.algorithms import take_1d
from pandas.core.base import NoNewAttributesMixin
from pandas.core.construction import extract_array
from pandas.core.construction import create_series_with_explicit_index, extract_array

if TYPE_CHECKING:
from pandas.arrays import StringArray
Expand Down Expand Up @@ -2180,7 +2180,7 @@ def _wrap_result(
returns_string=True,
):

from pandas import Index, Series, MultiIndex
from pandas import Index, MultiIndex

# for category, we do the stuff on the categories, so blow it up
# to the full series again
Expand All @@ -2190,7 +2190,9 @@ def _wrap_result(
if use_codes and self._is_categorical:
# if self._orig is a CategoricalIndex, there is no .cat-accessor
result = take_1d(
result, Series(self._orig, copy=False).cat.codes, fill_value=fill_value
result,
create_series_with_explicit_index(self._orig, copy=False).cat.codes,
fill_value=fill_value,
)

if not hasattr(result, "ndim") or not hasattr(result, "dtype"):
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from pandas.core import algorithms
from pandas.core.algorithms import unique
from pandas.core.arrays.datetimes import tz_to_dtype
from pandas.core.construction import create_series_with_explicit_index

# ---------------------------------------------------------------------
# types used in annotations
Expand Down Expand Up @@ -764,9 +765,10 @@ def to_datetime(
if errors == "raise":
raise
# ... otherwise, continue without the cache.
from pandas import Series

cache_array = Series([], dtype=object) # just an empty array
cache_array = create_series_with_explicit_index(
[], dtype=object
) # just an empty array
if not cache_array.empty:
result = _convert_and_box_cache(arg, cache_array)
else:
Expand Down
12 changes: 9 additions & 3 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,14 @@

from pandas.core import algorithms
from pandas.core.arrays import Categorical
from pandas.core.construction import create_series_with_explicit_index
from pandas.core.frame import DataFrame
from pandas.core.indexes.api import (
Index,
MultiIndex,
RangeIndex,
ensure_index_from_sequences,
)
from pandas.core.series import Series
from pandas.core.tools import datetimes as tools

from pandas.io.common import (
Expand Down Expand Up @@ -3494,14 +3494,20 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None):
if (index_col is None or index_col is False) or index_names is None:
index = Index([])
else:
data = [Series([], dtype=dtype[name]) for name in index_names]
data = [
create_series_with_explicit_index([], dtype=dtype[name])
for name in index_names
]
index = ensure_index_from_sequences(data, names=index_names)
index_col.sort()

for i, n in enumerate(index_col):
columns.pop(n - i)

col_dict = {col_name: Series([], dtype=dtype[col_name]) for col_name in columns}
col_dict = {
col_name: create_series_with_explicit_index([], dtype=dtype[col_name])
for col_name in columns
}

return index, columns, col_dict

Expand Down
7 changes: 5 additions & 2 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
from pandas.core.arrays import Categorical, DatetimeArray, PeriodArray
import pandas.core.common as com
from pandas.core.computation.pytables import PyTablesExpr, maybe_expression
from pandas.core.construction import create_series_with_explicit_index
from pandas.core.indexes.api import ensure_index

from pandas.io.common import stringify_path
Expand Down Expand Up @@ -3313,7 +3314,7 @@ def write_metadata(self, key: str, values: np.ndarray):
key : str
values : ndarray
"""
values = Series(values)
values = create_series_with_explicit_index(values)
self.parent.put(
self._get_metadata_path(key),
values,
Expand Down Expand Up @@ -4051,7 +4052,9 @@ def read_column(
encoding=self.encoding,
errors=self.errors,
)
return Series(_set_tz(col_values[1], a.tz), name=column)
return create_series_with_explicit_index(
_set_tz(col_values[1], a.tz), name=column
)

raise KeyError(f"column [{column}] not found in the table")

Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/arrays/boolean/test_reduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytest

import pandas as pd
from pandas.core.construction import create_series_with_explicit_index


@pytest.fixture
Expand Down Expand Up @@ -31,7 +32,7 @@ def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)

for con in [pd.array, pd.Series]:
for con in [pd.array, create_series_with_explicit_index]:
a = con(values, dtype="boolean")
assert a.any() is exp_any
assert a.all() is exp_all
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/arrays/integer/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import integer_array
from pandas.core.construction import create_series_with_explicit_index


@pytest.mark.parametrize("ufunc", [np.abs, np.sign])
Expand Down Expand Up @@ -105,7 +106,7 @@ def test_value_counts_na():

def test_value_counts_empty():
# https://github.com/pandas-dev/pandas/issues/33317
s = pd.Series([], dtype="Int64")
s = create_series_with_explicit_index([], dtype="Int64")
result = s.value_counts()
# TODO: The dtype of the index seems wrong (it's int64 for non-empty)
idx = pd.Index([], dtype="object")
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/base/test_unique.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import pandas as pd
import pandas._testing as tm
from pandas.core.construction import create_series_with_explicit_index
from pandas.tests.base.common import allow_na_ops


Expand Down Expand Up @@ -94,7 +95,9 @@ def test_nunique_null(null_obj, index_or_series_obj):
else:
values[0:2] = null_obj

klass = type(obj)
klass = (
create_series_with_explicit_index if isinstance(obj, pd.Series) else type(obj)
)
repeated_values = np.repeat(values, range(1, len(values) + 1))
obj = klass(repeated_values, dtype=obj.dtype)

Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/base/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
TimedeltaIndex,
)
import pandas._testing as tm
from pandas.core.construction import create_series_with_explicit_index
from pandas.tests.base.common import allow_na_ops


Expand Down Expand Up @@ -180,7 +181,7 @@ def test_value_counts_bins(index_or_series):
assert s.nunique() == 3

s = klass({}) if klass is dict else klass({}, dtype=object)
expected = Series([], dtype=np.int64)
expected = create_series_with_explicit_index([], dtype=np.int64)
tm.assert_series_equal(s.value_counts(), expected, check_index_type=False)
# returned dtype differs depending on original
if isinstance(s, Index):
Expand Down
Loading