pandas-dev · SaturnFromTitan · Feb 17, 2020 · Feb 20, 2020 · Feb 20, 2020 · Feb 20, 2020
diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst
@@ -190,15 +190,15 @@ The sum of an empty or all-NA Series or column of a DataFrame is 0.
 
    pd.Series([np.nan]).sum()
 
-   pd.Series([], dtype="float64").sum()
+   pd.Series([], dtype="float64", index=[]).sum()
 
 The product of an empty or all-NA Series or column of a DataFrame is 1.
 
 .. ipython:: python
 
    pd.Series([np.nan]).prod()
 
-   pd.Series([], dtype="float64").prod()
+   pd.Series([], dtype="float64", index=[]).prod()
 
 
 NA values in GroupBy

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -406,6 +406,7 @@ Deprecations
   arguments (:issue:`27573`).
 
 - :func:`pandas.api.types.is_categorical` is deprecated and will be removed in a future version; use `:func:pandas.api.types.is_categorical_dtype` instead (:issue:`33385`)
+- ``Series([])`` will raise a `DeprecationWarning` regarding its index. The default index type will change from :class:`RangeIndex` to :class:`Index` in a future version, matching the behaviour of ``Series()`` (:issue:`16737`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -53,7 +53,11 @@
 )
 from pandas.core.dtypes.missing import isna, na_value_for_dtype
 
-from pandas.core.construction import array, extract_array
+from pandas.core.construction import (
+    array,
+    create_series_with_explicit_index,
+    extract_array,
+)
 from pandas.core.indexers import validate_indices
 
 if TYPE_CHECKING:
@@ -835,7 +839,7 @@ def mode(values, dropna: bool = True) -> "Series":
         warn(f"Unable to sort modes: {err}")
 
     result = _reconstruct_data(result, original.dtype, original)
-    return Series(result)
+    return create_series_with_explicit_index(result)
 
 
 def rank(

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -16,7 +16,10 @@
 )
 from pandas.core.dtypes.generic import ABCSeries
 
-from pandas.core.construction import create_series_with_explicit_dtype
+from pandas.core.construction import (
+    create_series_with_explicit_dtype,
+    create_series_with_explicit_index,
+)
 
 if TYPE_CHECKING:
     from pandas import DataFrame, Series, Index
@@ -202,15 +205,15 @@ def apply_empty_result(self):
 
         if not should_reduce:
             try:
-                r = self.f(Series([], dtype=np.float64))
+                r = self.f(create_series_with_explicit_index([], dtype=np.float64))
             except Exception:
                 pass
             else:
                 should_reduce = not isinstance(r, Series)
 
         if should_reduce:
             if len(self.agg_axis):
-                r = self.f(Series([], dtype=np.float64))
+                r = self.f(create_series_with_explicit_index([], dtype=np.float64))
             else:
                 r = np.nan
 

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -621,10 +621,63 @@ def create_series_with_explicit_dtype(
     -------
     Series
     """
-    from pandas.core.series import Series
+    from pandas import RangeIndex
 
     if is_empty_data(data) and dtype is None:
         dtype = dtype_if_empty
+
+    return create_series_with_explicit_index(
+        data=data,
+        index=index,
+        dtype=dtype,
+        name=name,
+        copy=copy,
+        fastpath=fastpath,
+        index_if_empty=RangeIndex(0),  # non-breaking yet
+    )
+
+
+def create_series_with_explicit_index(
+    data: Any = None,
+    index: Optional[Union[ArrayLike, "Index"]] = None,
+    dtype: Optional[Dtype] = None,
+    name: Optional[str] = None,
+    copy: bool = False,
+    fastpath: bool = False,
+    index_if_empty: Optional["Index"] = None,
+) -> "Series":
+    """
+    Helper to pass an explicit index type when instantiating an Series where
+    data is list-like and empty.
+
+    This silences a DeprecationWarning described in GitHub-16737.
+
+    Parameters
+    ----------
+    data : Mirrored from Series.__init__
+    index : Mirrored from Series.__init__
+    dtype : Mirrored from Series.__init__
+    name : Mirrored from Series.__init__
+    copy : Mirrored from Series.__init__
+    fastpath : Mirrored from Series.__init__
+    index_if_empty : instance of (Index, RangeIndex)
+        This index type will be passed explicitly when Series is initialised
+        with `data` being list-like and empty.
+
+    Returns
+    -------
+    Series
+    """
+    from pandas import Index, Series  # noqa: F811
+
+    # to avoid circular imports
+    if index_if_empty is None:
+        index_if_empty = Index([])
+
+    # dict's are handled separately in Series.__init__
+    is_relevant_type = is_list_like(data) and not isinstance(data, dict)
+    if index is None and is_relevant_type and len(data) == 0:
+        index = index_if_empty
     return Series(
         data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath
     )
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -71,6 +71,7 @@
 from pandas.core.arrays.datetimes import tz_to_dtype, validate_tz_from_dtype
 from pandas.core.base import IndexOpsMixin, PandasObject
 import pandas.core.common as com
+from pandas.core.construction import create_series_with_explicit_index
 from pandas.core.indexers import deprecate_ndim_indexing
 from pandas.core.indexes.frozen import FrozenList
 import pandas.core.missing as missing
@@ -142,9 +143,7 @@ def index_arithmetic_method(self, other):
         if isinstance(other, (ABCSeries, ABCDataFrame, ABCTimedeltaIndex)):
             return NotImplemented
 
-        from pandas import Series
-
-        result = op(Series(self), other)
+        result = op(create_series_with_explicit_index(self), other)
         if isinstance(result, tuple):
             return (Index(result[0]), Index(result[1]))
         return Index(result)

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -308,7 +308,18 @@ def __init__(
             if index is None:
                 if not is_list_like(data):
                     data = [data]
-                index = ibase.default_index(len(data))
+
+                n = len(data)
+                if n == 0:
+                    # gh-16737
+                    warnings.warn(
+                        "The default index type for empty data will be 'Index' "
+                        "instead of 'RangeIndex' in a future version. "
+                        "Specify an index explicitly to silence this warning.",
+                        DeprecationWarning,
+                        stacklevel=2,
+                    )
+                index = ibase.default_index(n)
             elif is_list_like(data):
 
                 # a scalar numpy array is list-like but doesn't

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -36,7 +36,7 @@
 
 from pandas.core.algorithms import take_1d
 from pandas.core.base import NoNewAttributesMixin
-from pandas.core.construction import extract_array
+from pandas.core.construction import create_series_with_explicit_index, extract_array
 
 if TYPE_CHECKING:
     from pandas.arrays import StringArray
@@ -2180,7 +2180,7 @@ def _wrap_result(
         returns_string=True,
     ):
 
-        from pandas import Index, Series, MultiIndex
+        from pandas import Index, MultiIndex
 
         # for category, we do the stuff on the categories, so blow it up
         # to the full series again
@@ -2190,7 +2190,9 @@ def _wrap_result(
         if use_codes and self._is_categorical:
             # if self._orig is a CategoricalIndex, there is no .cat-accessor
             result = take_1d(
-                result, Series(self._orig, copy=False).cat.codes, fill_value=fill_value
+                result,
+                create_series_with_explicit_index(self._orig, copy=False).cat.codes,
+                fill_value=fill_value,
             )
 
         if not hasattr(result, "ndim") or not hasattr(result, "dtype"):

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -41,6 +41,7 @@
 from pandas.core import algorithms
 from pandas.core.algorithms import unique
 from pandas.core.arrays.datetimes import tz_to_dtype
+from pandas.core.construction import create_series_with_explicit_index
 
 # ---------------------------------------------------------------------
 # types used in annotations
@@ -764,9 +765,10 @@ def to_datetime(
             if errors == "raise":
                 raise
             # ... otherwise, continue without the cache.
-            from pandas import Series
 
-            cache_array = Series([], dtype=object)  # just an empty array
+            cache_array = create_series_with_explicit_index(
+                [], dtype=object
+            )  # just an empty array
         if not cache_array.empty:
             result = _convert_and_box_cache(arg, cache_array)
         else:

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -53,14 +53,14 @@
 
 from pandas.core import algorithms
 from pandas.core.arrays import Categorical
+from pandas.core.construction import create_series_with_explicit_index
 from pandas.core.frame import DataFrame
 from pandas.core.indexes.api import (
     Index,
     MultiIndex,
     RangeIndex,
     ensure_index_from_sequences,
 )
-from pandas.core.series import Series
 from pandas.core.tools import datetimes as tools
 
 from pandas.io.common import (
@@ -3494,14 +3494,20 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None):
     if (index_col is None or index_col is False) or index_names is None:
         index = Index([])
     else:
-        data = [Series([], dtype=dtype[name]) for name in index_names]
+        data = [
+            create_series_with_explicit_index([], dtype=dtype[name])
+            for name in index_names
+        ]
         index = ensure_index_from_sequences(data, names=index_names)
         index_col.sort()
 
         for i, n in enumerate(index_col):
             columns.pop(n - i)
 
-    col_dict = {col_name: Series([], dtype=dtype[col_name]) for col_name in columns}
+    col_dict = {
+        col_name: create_series_with_explicit_index([], dtype=dtype[col_name])
+        for col_name in columns
+    }
 
     return index, columns, col_dict
 

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -51,6 +51,7 @@
 from pandas.core.arrays import Categorical, DatetimeArray, PeriodArray
 import pandas.core.common as com
 from pandas.core.computation.pytables import PyTablesExpr, maybe_expression
+from pandas.core.construction import create_series_with_explicit_index
 from pandas.core.indexes.api import ensure_index
 
 from pandas.io.common import stringify_path
@@ -3313,7 +3314,7 @@ def write_metadata(self, key: str, values: np.ndarray):
         key : str
         values : ndarray
         """
-        values = Series(values)
+        values = create_series_with_explicit_index(values)
         self.parent.put(
             self._get_metadata_path(key),
             values,
@@ -4051,7 +4052,9 @@ def read_column(
                     encoding=self.encoding,
                     errors=self.errors,
                 )
-                return Series(_set_tz(col_values[1], a.tz), name=column)
+                return create_series_with_explicit_index(
+                    _set_tz(col_values[1], a.tz), name=column
+                )
 
         raise KeyError(f"column [{column}] not found in the table")
 

diff --git a/pandas/tests/arrays/boolean/test_reduction.py b/pandas/tests/arrays/boolean/test_reduction.py
@@ -2,6 +2,7 @@
 import pytest
 
 import pandas as pd
+from pandas.core.construction import create_series_with_explicit_index
 
 
 @pytest.fixture
@@ -31,7 +32,7 @@ def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
     exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
     exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)
 
-    for con in [pd.array, pd.Series]:
+    for con in [pd.array, create_series_with_explicit_index]:
         a = con(values, dtype="boolean")
         assert a.any() is exp_any
         assert a.all() is exp_all

diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py
@@ -4,6 +4,7 @@
 import pandas as pd
 import pandas._testing as tm
 from pandas.core.arrays import integer_array
+from pandas.core.construction import create_series_with_explicit_index
 
 
 @pytest.mark.parametrize("ufunc", [np.abs, np.sign])
@@ -105,7 +106,7 @@ def test_value_counts_na():
 
 def test_value_counts_empty():
     # https://github.com/pandas-dev/pandas/issues/33317
-    s = pd.Series([], dtype="Int64")
+    s = create_series_with_explicit_index([], dtype="Int64")
     result = s.value_counts()
     # TODO: The dtype of the index seems wrong (it's int64 for non-empty)
     idx = pd.Index([], dtype="object")

diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py
@@ -7,6 +7,7 @@
 
 import pandas as pd
 import pandas._testing as tm
+from pandas.core.construction import create_series_with_explicit_index
 from pandas.tests.base.common import allow_na_ops
 
 
@@ -94,7 +95,9 @@ def test_nunique_null(null_obj, index_or_series_obj):
     else:
         values[0:2] = null_obj
 
-    klass = type(obj)
+    klass = (
+        create_series_with_explicit_index if isinstance(obj, pd.Series) else type(obj)
+    )
     repeated_values = np.repeat(values, range(1, len(values) + 1))
     obj = klass(repeated_values, dtype=obj.dtype)
 

diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py
@@ -21,6 +21,7 @@
     TimedeltaIndex,
 )
 import pandas._testing as tm
+from pandas.core.construction import create_series_with_explicit_index
 from pandas.tests.base.common import allow_na_ops
 
 
@@ -180,7 +181,7 @@ def test_value_counts_bins(index_or_series):
     assert s.nunique() == 3
 
     s = klass({}) if klass is dict else klass({}, dtype=object)
-    expected = Series([], dtype=np.int64)
+    expected = create_series_with_explicit_index([], dtype=np.int64)
     tm.assert_series_equal(s.value_counts(), expected, check_index_type=False)
     # returned dtype differs depending on original
     if isinstance(s, Index):