Skip to content

Commit 134b335

Browse files
committed
Merge remote-tracking branch 'upstream/master' into 32bit-ci
2 parents 8093584 + d1e29be commit 134b335

20 files changed

+142
-114
lines changed

asv_bench/asv.conf.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
// The Pythons you'd like to test against. If not provided, defaults
2727
// to the current version of Python used to run `asv`.
2828
// "pythons": ["2.7", "3.4"],
29-
"pythons": ["3.6"],
29+
"pythons": ["3.8"],
3030

3131
// The matrix of dependencies to test. Each key is the name of a
3232
// package (in PyPI) and the values are version numbers. An empty

doc/source/whatsnew/v0.25.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,7 @@ with :attr:`numpy.nan` in the case of an empty :class:`DataFrame` (:issue:`26397
540540

541541
.. ipython:: python
542542
543-
df.describe()
543+
df.describe()
544544
545545
``__str__`` methods now call ``__repr__`` rather than vice versa
546546
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

doc/source/whatsnew/v1.1.2.rst

+3-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`)
18+
- Performance regression for :meth:`RangeIndex.format` (:issue:`35712`)
1819
-
19-
-
20+
2021

2122
.. ---------------------------------------------------------------------------
2223
@@ -26,7 +27,7 @@ Bug fixes
2627
~~~~~~~~~
2728
- Bug in :meth:`DataFrame.eval` with ``object`` dtype column binary operations (:issue:`35794`)
2829
- Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
29-
-
30+
- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`)
3031
-
3132

3233
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ Groupby/resample/rolling
254254
- Bug in :meth:`DataFrameGroupBy.apply` that would some times throw an erroneous ``ValueError`` if the grouping axis had duplicate entries (:issue:`16646`)
255255
- Bug when combining methods :meth:`DataFrame.groupby` with :meth:`DataFrame.resample` and :meth:`DataFrame.interpolate` raising an ``TypeError`` (:issue:`35325`)
256256
- Bug in :meth:`DataFrameGroupBy.apply` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)
257+
- Bug in :meth:`DataFrameGroupby.apply` would drop a :class:`CategoricalIndex` when grouped on. (:issue:`35792`)
257258

258259
Reshaping
259260
^^^^^^^^^

pandas/core/groupby/generic.py

+25-65
Original file line numberDiff line numberDiff line change
@@ -1197,57 +1197,25 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
11971197
if len(keys) == 0:
11981198
return self.obj._constructor(index=keys)
11991199

1200-
key_names = self.grouper.names
1201-
12021200
# GH12824
12031201
first_not_none = next(com.not_none(*values), None)
12041202

12051203
if first_not_none is None:
1206-
# GH9684. If all values are None, then this will throw an error.
1207-
# We'd prefer it return an empty dataframe.
1204+
# GH9684 - All values are None, return an empty frame.
12081205
return self.obj._constructor()
12091206
elif isinstance(first_not_none, DataFrame):
12101207
return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
12111208
else:
1212-
if len(self.grouper.groupings) > 1:
1213-
key_index = self.grouper.result_index
1214-
1215-
else:
1216-
ping = self.grouper.groupings[0]
1217-
if len(keys) == ping.ngroups:
1218-
key_index = ping.group_index
1219-
key_index.name = key_names[0]
1220-
1221-
key_lookup = Index(keys)
1222-
indexer = key_lookup.get_indexer(key_index)
1223-
1224-
# reorder the values
1225-
values = [values[i] for i in indexer]
1226-
1227-
# update due to the potential reorder
1228-
first_not_none = next(com.not_none(*values), None)
1229-
else:
1230-
1231-
key_index = Index(keys, name=key_names[0])
1232-
1233-
# don't use the key indexer
1234-
if not self.as_index:
1235-
key_index = None
1209+
key_index = self.grouper.result_index if self.as_index else None
12361210

1237-
# make Nones an empty object
1238-
if first_not_none is None:
1239-
return self.obj._constructor()
1240-
elif isinstance(first_not_none, NDFrame):
1211+
if isinstance(first_not_none, Series):
12411212

12421213
# this is to silence a DeprecationWarning
12431214
# TODO: Remove when default dtype of empty Series is object
12441215
kwargs = first_not_none._construct_axes_dict()
1245-
if isinstance(first_not_none, Series):
1246-
backup = create_series_with_explicit_dtype(
1247-
**kwargs, dtype_if_empty=object
1248-
)
1249-
else:
1250-
backup = first_not_none._constructor(**kwargs)
1216+
backup = create_series_with_explicit_dtype(
1217+
**kwargs, dtype_if_empty=object
1218+
)
12511219

12521220
values = [x if (x is not None) else backup for x in values]
12531221

@@ -1256,7 +1224,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
12561224
if isinstance(v, (np.ndarray, Index, Series)) or not self.as_index:
12571225
if isinstance(v, Series):
12581226
applied_index = self._selected_obj._get_axis(self.axis)
1259-
all_indexed_same = all_indexes_same([x.index for x in values])
1227+
all_indexed_same = all_indexes_same((x.index for x in values))
12601228
singular_series = len(values) == 1 and applied_index.nlevels == 1
12611229

12621230
# GH3596
@@ -1288,7 +1256,6 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
12881256
# GH 8467
12891257
return self._concat_objects(keys, values, not_indexed_same=True)
12901258

1291-
if self.axis == 0 and isinstance(v, ABCSeries):
12921259
# GH6124 if the list of Series have a consistent name,
12931260
# then propagate that name to the result.
12941261
index = v.index.copy()
@@ -1301,34 +1268,27 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
13011268
if len(names) == 1:
13021269
index.name = list(names)[0]
13031270

1304-
# normally use vstack as its faster than concat
1305-
# and if we have mi-columns
1306-
if (
1307-
isinstance(v.index, MultiIndex)
1308-
or key_index is None
1309-
or isinstance(key_index, MultiIndex)
1310-
):
1311-
stacked_values = np.vstack([np.asarray(v) for v in values])
1312-
result = self.obj._constructor(
1313-
stacked_values, index=key_index, columns=index
1314-
)
1315-
else:
1316-
# GH5788 instead of stacking; concat gets the
1317-
# dtypes correct
1318-
from pandas.core.reshape.concat import concat
1319-
1320-
result = concat(
1321-
values,
1322-
keys=key_index,
1323-
names=key_index.names,
1324-
axis=self.axis,
1325-
).unstack()
1326-
result.columns = index
1327-
elif isinstance(v, ABCSeries):
1271+
# Combine values
1272+
# vstack+constructor is faster than concat and handles MI-columns
13281273
stacked_values = np.vstack([np.asarray(v) for v in values])
1274+
1275+
if self.axis == 0:
1276+
index = key_index
1277+
columns = v.index.copy()
1278+
if columns.name is None:
1279+
# GH6124 - propagate name of Series when it's consistent
1280+
names = {v.name for v in values}
1281+
if len(names) == 1:
1282+
columns.name = list(names)[0]
1283+
else:
1284+
index = v.index
1285+
columns = key_index
1286+
stacked_values = stacked_values.T
1287+
13291288
result = self.obj._constructor(
1330-
stacked_values.T, index=v.index, columns=key_index
1289+
stacked_values, index=index, columns=columns
13311290
)
1291+
13321292
elif not self.as_index:
13331293
# We add grouping column below, so create a frame here
13341294
result = DataFrame(

pandas/core/indexes/api.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -297,15 +297,16 @@ def all_indexes_same(indexes):
297297
298298
Parameters
299299
----------
300-
indexes : list of Index objects
300+
indexes : iterable of Index objects
301301
302302
Returns
303303
-------
304304
bool
305305
True if all indexes contain the same elements, False otherwise.
306306
"""
307-
first = indexes[0]
308-
for index in indexes[1:]:
307+
itr = iter(indexes)
308+
first = next(itr)
309+
for index in itr:
309310
if not first.equals(index):
310311
return False
311312
return True

pandas/core/indexes/base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -933,7 +933,9 @@ def format(
933933

934934
return self._format_with_header(header, na_rep=na_rep)
935935

936-
def _format_with_header(self, header, na_rep="NaN") -> List[str_t]:
936+
def _format_with_header(
937+
self, header: List[str_t], na_rep: str_t = "NaN"
938+
) -> List[str_t]:
937939
from pandas.io.formats.format import format_array
938940

939941
values = self._values

pandas/core/indexes/category.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ def _format_attrs(self):
347347
attrs.append(("length", len(self)))
348348
return attrs
349349

350-
def _format_with_header(self, header, na_rep="NaN") -> List[str]:
350+
def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]:
351351
from pandas.io.formats.printing import pprint_thing
352352

353353
result = [

pandas/core/indexes/datetimelike.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -354,15 +354,20 @@ def format(
354354
"""
355355
header = []
356356
if name:
357-
fmt_name = ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
358-
header.append(fmt_name)
357+
header.append(
358+
ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
359+
if self.name is not None
360+
else ""
361+
)
359362

360363
if formatter is not None:
361364
return header + list(self.map(formatter))
362365

363366
return self._format_with_header(header, na_rep=na_rep, date_format=date_format)
364367

365-
def _format_with_header(self, header, na_rep="NaT", date_format=None) -> List[str]:
368+
def _format_with_header(
369+
self, header: List[str], na_rep: str = "NaT", date_format: Optional[str] = None
370+
) -> List[str]:
366371
return header + list(
367372
self._format_native_types(na_rep=na_rep, date_format=date_format)
368373
)

pandas/core/indexes/interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
948948
# Rendering Methods
949949
# __repr__ associated methods are based on MultiIndex
950950

951-
def _format_with_header(self, header, na_rep="NaN") -> List[str]:
951+
def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]:
952952
return header + list(self._format_native_types(na_rep=na_rep))
953953

954954
def _format_native_types(self, na_rep="NaN", quoting=None, **kwargs):

pandas/core/indexes/range.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from datetime import timedelta
22
import operator
33
from sys import getsizeof
4-
from typing import Any
4+
from typing import Any, List
55
import warnings
66

77
import numpy as np
@@ -187,6 +187,15 @@ def _format_data(self, name=None):
187187
# we are formatting thru the attributes
188188
return None
189189

190+
def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]:
191+
if not len(self._range):
192+
return header
193+
first_val_str = str(self._range[0])
194+
last_val_str = str(self._range[-1])
195+
max_length = max(len(first_val_str), len(last_val_str))
196+
197+
return header + [f"{x:<{max_length}}" for x in self._range]
198+
190199
# --------------------------------------------------------------------
191200
_deprecation_message = (
192201
"RangeIndex.{} is deprecated and will be "

pandas/tests/groupby/test_apply.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -861,13 +861,14 @@ def test_apply_multi_level_name(category):
861861
b = [1, 2] * 5
862862
if category:
863863
b = pd.Categorical(b, categories=[1, 2, 3])
864+
expected_index = pd.CategoricalIndex([1, 2], categories=[1, 2, 3], name="B")
865+
else:
866+
expected_index = pd.Index([1, 2], name="B")
864867
df = pd.DataFrame(
865868
{"A": np.arange(10), "B": b, "C": list(range(10)), "D": list(range(10))}
866869
).set_index(["A", "B"])
867870
result = df.groupby("B").apply(lambda x: x.sum())
868-
expected = pd.DataFrame(
869-
{"C": [20, 25], "D": [20, 25]}, index=pd.Index([1, 2], name="B")
870-
)
871+
expected = pd.DataFrame({"C": [20, 25], "D": [20, 25]}, index=expected_index)
871872
tm.assert_frame_equal(result, expected)
872873
assert df.index.names == ["A", "B"]
873874

pandas/tests/indexes/common.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import gc
2-
from typing import Optional, Type
2+
from typing import Type
33

44
import numpy as np
55
import pytest
@@ -33,7 +33,7 @@
3333
class Base:
3434
""" base class for index sub-class tests """
3535

36-
_holder: Optional[Type[Index]] = None
36+
_holder: Type[Index]
3737
_compat_props = ["shape", "ndim", "size", "nbytes"]
3838

3939
def create_index(self) -> Index:
@@ -686,6 +686,12 @@ def test_format(self):
686686
expected = [str(x) for x in idx]
687687
assert idx.format() == expected
688688

689+
def test_format_empty(self):
690+
# GH35712
691+
empty_idx = self._holder([])
692+
assert empty_idx.format() == []
693+
assert empty_idx.format(name=True) == [""]
694+
689695
def test_hasnans_isnans(self, index):
690696
# GH 11343, added tests for hasnans / isnans
691697
if isinstance(index, MultiIndex):

pandas/tests/indexes/period/test_period.py

+6
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,12 @@ def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key):
536536
with pytest.raises(KeyError, match=msg):
537537
df.loc[key]
538538

539+
def test_format_empty(self):
540+
# GH35712
541+
empty_idx = self._holder([], freq="A")
542+
assert empty_idx.format() == []
543+
assert empty_idx.format(name=True) == [""]
544+
539545

540546
def test_maybe_convert_timedelta():
541547
pi = PeriodIndex(["2000", "2001"], freq="D")

pandas/tests/indexes/ranges/test_range.py

+12
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,14 @@ def test_cache(self):
171171
pass
172172
assert idx._cache == {}
173173

174+
idx.format()
175+
assert idx._cache == {}
176+
174177
df = pd.DataFrame({"a": range(10)}, index=idx)
175178

179+
str(df)
180+
assert idx._cache == {}
181+
176182
df.loc[50]
177183
assert idx._cache == {}
178184

@@ -515,3 +521,9 @@ def test_engineless_lookup(self):
515521
idx.get_loc("a")
516522

517523
assert "_engine" not in idx._cache
524+
525+
def test_format_empty(self):
526+
# GH35712
527+
empty_idx = self._holder(0)
528+
assert empty_idx.format() == []
529+
assert empty_idx.format(name=True) == [""]

0 commit comments

Comments
 (0)