Skip to content

Commit 95b4029

Browse files
added FutureWarning to empty Series without dtype and adjusted the tests so that no unnecessary warnings are thrown
1 parent debaf9a commit 95b4029

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+359
-235
lines changed

doc/source/user_guide/missing_data.rst

+2
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ Sum/prod of empties/nans
186186
The sum of an empty or all-NA Series or column of a DataFrame is 0.
187187

188188
.. ipython:: python
189+
:okwarning:
189190
190191
pd.Series([np.nan]).sum()
191192
@@ -194,6 +195,7 @@ The sum of an empty or all-NA Series or column of a DataFrame is 0.
194195
The product of an empty or all-NA Series or column of a DataFrame is 1.
195196

196197
.. ipython:: python
198+
:okwarning:
197199
198200
pd.Series([np.nan]).prod()
199201

doc/source/whatsnew/v1.0.0.rst

+4-1
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,9 @@ Other API changes
201201
See :ref:`units registration <whatsnew_1000.matplotlib_units>` for more.
202202
- :meth:`Series.dropna` has dropped its ``**kwargs`` argument in favor of a single ``how`` parameter.
203203
Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`)
204+
- Initialising an empty :class:`pandas.core.series.Series` without specifying a dtype will raise a FutureWarning now.
205+
The default dtype will change from ``float64`` to ``object`` in future releases so that it is consistent with the
206+
behaviour of :class:`pandas.core.frame.DataFrame` and :class:`pandas.core.indexes.base.Index`.
204207
-
205208

206209

@@ -243,7 +246,7 @@ Removal of prior version deprecations/changes
243246

244247
Previously, pandas would register converters with matplotlib as a side effect of importing pandas (:issue:`18720`).
245248
This changed the output of plots made via matplotlib plots after pandas was imported, even if you were using
246-
matplotlib directly rather than rather than :meth:`~DataFrame.plot`.
249+
matplotlib directly rather than :meth:`~DataFrame.plot`.
247250

248251
To use pandas formatters with a matplotlib plot, specify
249252

pandas/compat/pickle_compat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def __new__(cls) -> "Series": # type: ignore
6464
stacklevel=6,
6565
)
6666

67-
return Series()
67+
return Series(dtype=object)
6868

6969

7070
class _LoadSparseFrame:

pandas/core/apply.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import abc
22
import inspect
3-
from typing import TYPE_CHECKING, Any, Dict, Iterator, Tuple, Type, Union
3+
from typing import TYPE_CHECKING, Any, Dict, Iterator, Optional, Type, Union
44

55
import numpy as np
66

@@ -202,15 +202,15 @@ def apply_empty_result(self):
202202

203203
if not should_reduce:
204204
try:
205-
r = self.f(Series([]))
205+
r = self.f(Series([], dtype=np.float64))
206206
except Exception:
207207
pass
208208
else:
209209
should_reduce = not isinstance(r, Series)
210210

211211
if should_reduce:
212212
if len(self.agg_axis):
213-
r = self.f(Series([]))
213+
r = self.f(Series([], dtype=np.float64))
214214
else:
215215
r = np.nan
216216

@@ -343,14 +343,21 @@ def apply_series_generator(self) -> Tuple[ResType, "Index"]:
343343
def wrap_results(
344344
self, results: ResType, res_index: "Index"
345345
) -> Union["Series", "DataFrame"]:
346+
from pandas import Series
346347

347348
# see if we can infer the results
348349
if len(results) > 0 and 0 in results and is_sequence(results[0]):
349350

350351
return self.wrap_results_for_axis(results, res_index)
351352

352353
# dict of scalars
353-
result = self.obj._constructor_sliced(results)
354+
# TODO: Remove if/else block when default dtype of Series is changed to object
355+
constructor_sliced = self.obj._constructor_sliced
356+
is_empty = isinstance(results, (list, tuple, dict)) and not results
357+
if constructor_sliced is Series and is_empty:
358+
result = constructor_sliced(results, dtype=np.float64)
359+
else:
360+
result = constructor_sliced(results)
354361
result.index = res_index
355362

356363
return result

pandas/core/base.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1256,7 +1256,10 @@ def _map_values(self, mapper, na_action=None):
12561256
# possibility that they are tuples
12571257
from pandas import Series
12581258

1259-
mapper = Series(mapper)
1259+
if not mapper:
1260+
mapper = Series(mapper, dtype=np.float64)
1261+
else:
1262+
mapper = Series(mapper)
12601263

12611264
if isinstance(mapper, ABCSeries):
12621265
# Since values were input this means we came from either

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -8116,7 +8116,8 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
81168116
cols = Index([], name=self.columns.name)
81178117
if is_list_like(q):
81188118
return self._constructor([], index=q, columns=cols)
8119-
return self._constructor_sliced([], index=cols, name=q)
8119+
8120+
return self._constructor_sliced([], index=cols, name=q, dtype=np.float64)
81208121

81218122
result = data._data.quantile(
81228123
qs=q, axis=1, interpolation=interpolation, transposed=is_transposed

pandas/core/generic.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -6281,6 +6281,8 @@ def fillna(
62816281
2 NaN 1.0 NaN 5
62826282
3 NaN 3.0 NaN 4
62836283
"""
6284+
from pandas import Series
6285+
62846286
inplace = validate_bool_kwarg(inplace, "inplace")
62856287
value, method = validate_fillna_kwargs(value, method)
62866288

@@ -6317,8 +6319,10 @@ def fillna(
63176319
return self
63186320

63196321
if self.ndim == 1:
6320-
if isinstance(value, (dict, ABCSeries)):
6321-
from pandas import Series
6322+
if isinstance(value, dict):
6323+
dtype = object if not value else None
6324+
value = Series(value, dtype=dtype)
6325+
elif isinstance(value, ABCSeries):
63226326

63236327
value = Series(value)
63246328
elif not is_list_like(value):
@@ -7263,7 +7267,7 @@ def asof(self, where, subset=None):
72637267
if not is_series:
72647268
from pandas import Series
72657269

7266-
return Series(index=self.columns, name=where)
7270+
return Series(index=self.columns, name=where, dtype=np.float64)
72677271
return np.nan
72687272

72697273
# It's always much faster to use a *while* loop here for

pandas/core/groupby/generic.py

+19-5
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,13 @@ def aggregate(self, func=None, *args, **kwargs):
251251
result = self._aggregate_named(func, *args, **kwargs)
252252

253253
index = Index(sorted(result), name=self.grouper.names[0])
254-
ret = Series(result, index=index)
254+
255+
# TODO: if/else can be removed as soon as default dtype
256+
# for empty series is changed object
257+
if result:
258+
ret = Series(result, index=index)
259+
else:
260+
ret = Series(result, index=index, dtype=object)
255261

256262
if not self.as_index: # pragma: no cover
257263
print("Warning, ignoring as_index=True")
@@ -348,7 +354,7 @@ def _wrap_transformed_output(self, output, names=None):
348354
def _wrap_applied_output(self, keys, values, not_indexed_same=False):
349355
if len(keys) == 0:
350356
# GH #6265
351-
return Series([], name=self._selection_name, index=keys)
357+
return Series([], name=self._selection_name, index=keys, dtype=np.float64)
352358

353359
def _get_index() -> Index:
354360
if self.grouper.nkeys > 1:
@@ -430,7 +436,7 @@ def transform(self, func, *args, **kwargs):
430436

431437
result = concat(results).sort_index()
432438
else:
433-
result = Series()
439+
result = Series(dtype=np.float64)
434440

435441
# we will only try to coerce the result type if
436442
# we have a numeric dtype, as these are *always* udfs
@@ -1164,9 +1170,17 @@ def first_not_none(values):
11641170
if v is None:
11651171
return DataFrame()
11661172
elif isinstance(v, NDFrame):
1173+
1174+
# this is to silence a FutureWarning
1175+
# TODO: Remove when default dtype of empty Series is object
1176+
kwargs = v._construct_axes_dict()
1177+
if v._constructor is Series:
1178+
is_empty = "data" not in kwargs or not kwargs["data"]
1179+
if "dtype" not in kwargs and is_empty:
1180+
kwargs["dtype"] = object
1181+
11671182
values = [
1168-
x if x is not None else v._constructor(**v._construct_axes_dict())
1169-
for x in values
1183+
x if (x is not None) else v._constructor(**kwargs) for x in values
11701184
]
11711185

11721186
v = values[0]

pandas/core/series.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,19 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
202202
def __init__(
203203
self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False
204204
):
205+
no_data = data is None or (isinstance(data, (list, tuple, dict)) and not data)
206+
if no_data and dtype is None:
207+
# Empty Series should have dtype object to be consistent
208+
# with the behaviour of DataFrame and Index
209+
warnings.warn(
210+
"The default dtype for empty Series will be 'object' instead"
211+
" of 'float64' in the next version. Specify a dtype explicitly"
212+
" to silence this warning.",
213+
FutureWarning,
214+
stacklevel=2,
215+
)
216+
# uncomment the line below when removing the FutureWarning
217+
# dtype = np.dtype(object)
205218

206219
# we are called internally, so short-circuit
207220
if fastpath:
@@ -357,7 +370,11 @@ def _init_dict(self, data, index=None, dtype=None):
357370
keys, values = [], []
358371

359372
# Input is now list-like, so rely on "standard" construction:
360-
s = Series(values, index=keys, dtype=dtype)
373+
# TODO: warning filter can be removed when default dtype for Series
374+
# is changed to object.
375+
with warnings.catch_warnings():
376+
warnings.simplefilter(action="ignore", category=FutureWarning)
377+
s = Series(values, index=keys, dtype=dtype)
361378

362379
# Now we just make sure the order is respected, if any
363380
if data and index is not None:

pandas/core/tools/datetimes.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,8 @@ def _maybe_cache(arg, format, cache, convert_listlike):
145145
"""
146146
from pandas import Series
147147

148-
cache_array = Series()
148+
cache_array = Series(dtype=object)
149+
149150
if cache:
150151
# Perform a quicker unique check
151152
if not should_cache(arg):

pandas/io/html.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -767,7 +767,8 @@ def _parse_tfoot_tr(self, table):
767767

768768

769769
def _expand_elements(body):
770-
lens = Series([len(elem) for elem in body])
770+
dtype = None if body else object
771+
lens = Series([len(elem) for elem in body], dtype=dtype)
771772
lens_max = lens.max()
772773
not_max = lens[lens != lens_max]
773774

pandas/io/json/_json.py

+39-30
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from collections import OrderedDict
2+
import functools
23
from io import StringIO
34
from itertools import islice
45
import os
@@ -1005,43 +1006,51 @@ class SeriesParser(Parser):
10051006

10061007
def _parse_no_numpy(self):
10071008

1008-
json = self.json
1009-
orient = self.orient
1010-
if orient == "split":
1011-
decoded = {
1012-
str(k): v
1013-
for k, v in loads(json, precise_float=self.precise_float).items()
1014-
}
1009+
data = loads(self.json, precise_float=self.precise_float)
1010+
1011+
if self.orient == "split":
1012+
is_empty = self._is_empty(data["data"])
1013+
else:
1014+
is_empty = self._is_empty(data)
1015+
dtype = object if is_empty else None
1016+
1017+
if self.orient == "split":
1018+
decoded = {str(k): v for k, v in data.items()}
10151019
self.check_keys_split(decoded)
1016-
self.obj = Series(dtype=None, **decoded)
1020+
self.obj = Series(**decoded, dtype=dtype)
10171021
else:
1018-
self.obj = Series(loads(json, precise_float=self.precise_float), dtype=None)
1022+
self.obj = Series(data, dtype=dtype)
10191023

10201024
def _parse_numpy(self):
10211025

1022-
json = self.json
1023-
orient = self.orient
1024-
if orient == "split":
1025-
decoded = loads(
1026-
json, dtype=None, numpy=True, precise_float=self.precise_float
1027-
)
1028-
decoded = {str(k): v for k, v in decoded.items()}
1026+
kwargs = {"dtype": None, "numpy": True, "precise_float": self.precise_float}
1027+
if self.orient in ["columns", "index"]:
1028+
kwargs["labelled"] = True
1029+
loads_ = functools.partial(loads, **kwargs)
1030+
data = loads_(self.json)
1031+
1032+
# this is needed to silence a FutureWarning
1033+
# TODO: Remove this when the default dtype of empty Series is changed to object
1034+
if self.orient == "split":
1035+
is_empty = self._is_empty(data["data"])
1036+
else:
1037+
is_empty = self._is_empty(data)
1038+
dtype = object if is_empty else None
1039+
1040+
if self.orient == "split":
1041+
decoded = {str(k): v for k, v in data.items()}
10291042
self.check_keys_split(decoded)
1030-
self.obj = Series(**decoded)
1031-
elif orient == "columns" or orient == "index":
1032-
self.obj = Series(
1033-
*loads(
1034-
json,
1035-
dtype=None,
1036-
numpy=True,
1037-
labelled=True,
1038-
precise_float=self.precise_float,
1039-
)
1040-
)
1043+
self.obj = Series(**decoded, dtype=dtype)
1044+
elif self.orient in ["columns", "index"]:
1045+
self.obj = Series(*data, dtype=dtype)
10411046
else:
1042-
self.obj = Series(
1043-
loads(json, dtype=None, numpy=True, precise_float=self.precise_float)
1044-
)
1047+
self.obj = Series(data, dtype=dtype)
1048+
1049+
@staticmethod
1050+
def _is_empty(data):
1051+
is_empty_np = isinstance(data, np.ndarray) and (data.size == 0)
1052+
is_empty_reg = isinstance(data, (list, tuple, dict)) and not data
1053+
return is_empty_np or is_empty_reg
10451054

10461055
def _try_convert_types(self):
10471056
if self.obj is None:

pandas/plotting/_matplotlib/boxplot.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def maybe_color_bp(self, bp):
115115

116116
def _make_plot(self):
117117
if self.subplots:
118-
self._return_obj = pd.Series()
118+
self._return_obj = pd.Series(dtype=object)
119119

120120
for i, (label, y) in enumerate(self._iter_data()):
121121
ax = self._get_ax(i)
@@ -407,7 +407,8 @@ def boxplot_frame_groupby(
407407
)
408408
axes = _flatten(axes)
409409

410-
ret = pd.Series()
410+
ret = pd.Series(dtype=object)
411+
411412
for (key, group), ax in zip(grouped, axes):
412413
d = group.boxplot(
413414
ax=ax, column=column, fontsize=fontsize, rot=rot, grid=grid, **kwds

pandas/tests/arrays/categorical/test_algos.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def test_replace(to_replace, value, result):
7777
tm.assert_categorical_equal(cat, expected)
7878

7979

80-
@pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])])
80+
@pytest.mark.parametrize("empty", [[], pd.Series(dtype=object), np.array([])])
8181
def test_isin_empty(empty):
8282
s = pd.Categorical(["a", "b"])
8383
expected = np.array([False, False], dtype=bool)

0 commit comments

Comments
 (0)