Skip to content

Commit 90d0f8b

Browse files
committed
Merge remote-tracking branch 'upstream/master' into xs-fails-with-slice
2 parents 8bcb1bf + cda8284 commit 90d0f8b

File tree

18 files changed

+222
-61
lines changed

18 files changed

+222
-61
lines changed

azure-pipelines.yml

+2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# Adapted from https://github.com/numba/numba/blob/master/azure-pipelines.yml
22
trigger:
33
- master
4+
- 1.1.x
45

56
pr:
67
- master
8+
- 1.1.x
79

810
variables:
911
PYTEST_WORKERS: auto

doc/source/whatsnew/v1.1.1.rst

+8-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717

18-
-
18+
- Fixed regression where :func:`read_csv` would raise a ``ValueError`` when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`35493`).
1919
-
2020
-
2121

@@ -26,6 +26,13 @@ Fixed regressions
2626
Bug fixes
2727
~~~~~~~~~
2828

29+
30+
Categorical
31+
^^^^^^^^^^^
32+
33+
- Bug in :meth:`CategoricalIndex.format` where, when stringified scalars had different lengths, the shorter string would be right-filled with spaces, so it had the same length as the longest string (:issue:`35439`)
34+
35+
2936
**Datetimelike**
3037

3138
-

pandas/_libs/missing.pyx

+4-1
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,10 @@ def isnaobj_old(arr: ndarray) -> ndarray:
155155
result = np.zeros(n, dtype=np.uint8)
156156
for i in range(n):
157157
val = arr[i]
158-
result[i] = checknull(val) or val == INF or val == NEGINF
158+
result[i] = (
159+
checknull(val)
160+
or util.is_float_object(val) and (val == INF or val == NEGINF)
161+
)
159162
return result.view(np.bool_)
160163

161164

pandas/_libs/tslibs/vectorized.pyx

+24-33
Original file line numberDiff line numberDiff line change
@@ -211,49 +211,40 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None):
211211
int reso = RESO_DAY, curr_reso
212212
ndarray[int64_t] trans
213213
int64_t[:] deltas
214-
Py_ssize_t[:] pos
215-
int64_t local_val, delta
214+
intp_t[:] pos
215+
int64_t local_val, delta = NPY_NAT
216+
bint use_utc = False, use_tzlocal = False, use_fixed = False
216217

217218
if is_utc(tz) or tz is None:
218-
for i in range(n):
219-
if stamps[i] == NPY_NAT:
220-
continue
221-
dt64_to_dtstruct(stamps[i], &dts)
222-
curr_reso = _reso_stamp(&dts)
223-
if curr_reso < reso:
224-
reso = curr_reso
219+
use_utc = True
225220
elif is_tzlocal(tz):
226-
for i in range(n):
227-
if stamps[i] == NPY_NAT:
228-
continue
229-
local_val = tz_convert_utc_to_tzlocal(stamps[i], tz)
230-
dt64_to_dtstruct(local_val, &dts)
231-
curr_reso = _reso_stamp(&dts)
232-
if curr_reso < reso:
233-
reso = curr_reso
221+
use_tzlocal = True
234222
else:
235-
# Adjust datetime64 timestamp, recompute datetimestruct
236223
trans, deltas, typ = get_dst_info(tz)
237-
238224
if typ not in ["pytz", "dateutil"]:
239225
# static/fixed; in this case we know that len(delta) == 1
226+
use_fixed = True
240227
delta = deltas[0]
241-
for i in range(n):
242-
if stamps[i] == NPY_NAT:
243-
continue
244-
dt64_to_dtstruct(stamps[i] + delta, &dts)
245-
curr_reso = _reso_stamp(&dts)
246-
if curr_reso < reso:
247-
reso = curr_reso
248228
else:
249229
pos = trans.searchsorted(stamps, side="right") - 1
250-
for i in range(n):
251-
if stamps[i] == NPY_NAT:
252-
continue
253-
dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts)
254-
curr_reso = _reso_stamp(&dts)
255-
if curr_reso < reso:
256-
reso = curr_reso
230+
231+
for i in range(n):
232+
if stamps[i] == NPY_NAT:
233+
continue
234+
235+
if use_utc:
236+
local_val = stamps[i]
237+
elif use_tzlocal:
238+
local_val = tz_convert_utc_to_tzlocal(stamps[i], tz)
239+
elif use_fixed:
240+
local_val = stamps[i] + delta
241+
else:
242+
local_val = stamps[i] + deltas[pos[i]]
243+
244+
dt64_to_dtstruct(local_val, &dts)
245+
curr_reso = _reso_stamp(&dts)
246+
if curr_reso < reso:
247+
reso = curr_reso
257248

258249
return Resolution(reso)
259250

pandas/core/indexes/base.py

+9-10
Original file line numberDiff line numberDiff line change
@@ -4252,16 +4252,15 @@ def equals(self, other: Any) -> bool:
42524252
if not isinstance(other, Index):
42534253
return False
42544254

4255-
if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype):
4256-
# if other is not object, use other's logic for coercion
4257-
return other.equals(self)
4258-
4259-
if isinstance(other, ABCMultiIndex):
4260-
# d-level MultiIndex can equal d-tuple Index
4261-
return other.equals(self)
4262-
4263-
if is_extension_array_dtype(other.dtype):
4264-
# All EA-backed Index subclasses override equals
4255+
# If other is a subclass of self and defines it's own equals method, we
4256+
# dispatch to the subclass method. For instance for a MultiIndex,
4257+
# a d-level MultiIndex can equal d-tuple Index.
4258+
# Note: All EA-backed Index subclasses override equals
4259+
if (
4260+
isinstance(other, type(self))
4261+
and type(other) is not type(self)
4262+
and other.equals is not self.equals
4263+
):
42654264
return other.equals(self)
42664265

42674266
return array_equivalent(self._values, other._values)

pandas/core/indexes/category.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
pandas_dtype,
2121
)
2222
from pandas.core.dtypes.dtypes import CategoricalDtype
23-
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
23+
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna
2424

2525
from pandas.core import accessor
2626
from pandas.core.algorithms import take_1d
@@ -348,12 +348,12 @@ def _format_attrs(self):
348348
return attrs
349349

350350
def _format_with_header(self, header, na_rep="NaN") -> List[str]:
351-
from pandas.io.formats.format import format_array
351+
from pandas.io.formats.printing import pprint_thing
352352

353-
formatted_values = format_array(
354-
self._values, formatter=None, na_rep=na_rep, justify="left"
355-
)
356-
result = ibase.trim_front(formatted_values)
353+
result = [
354+
pprint_thing(x, escape_chars=("\t", "\r", "\n")) if notna(x) else na_rep
355+
for x in self._values
356+
]
357357
return header + result
358358

359359
# --------------------------------------------------------------------

pandas/core/indexes/range.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from datetime import timedelta
22
import operator
33
from sys import getsizeof
4-
from typing import Any, List, Optional
4+
from typing import Any, Optional
55
import warnings
66

77
import numpy as np
@@ -33,8 +33,6 @@
3333
from pandas.core.indexes.numeric import Int64Index
3434
from pandas.core.ops.common import unpack_zerodim_and_defer
3535

36-
from pandas.io.formats.printing import pprint_thing
37-
3836
_empty_range = range(0)
3937

4038

@@ -197,9 +195,6 @@ def _format_data(self, name=None):
197195
# we are formatting thru the attributes
198196
return None
199197

200-
def _format_with_header(self, header, na_rep="NaN") -> List[str]:
201-
return header + [pprint_thing(x) for x in self._range]
202-
203198
# --------------------------------------------------------------------
204199
_deprecation_message = (
205200
"RangeIndex.{} is deprecated and will be "

pandas/core/indexing.py

+11
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,8 @@ def loc(self) -> "_LocIndexer":
255255
256256
- A boolean array of the same length as the axis being sliced,
257257
e.g. ``[True, False, True]``.
258+
- An alignable boolean Series. The index of the key will be aligned before
259+
masking.
258260
- A ``callable`` function with one argument (the calling Series or
259261
DataFrame) and that returns valid output for indexing (one of the above)
260262
@@ -264,6 +266,8 @@ def loc(self) -> "_LocIndexer":
264266
------
265267
KeyError
266268
If any items are not found.
269+
IndexingError
270+
If an indexed key is passed and its index is unalignable to the frame index.
267271
268272
See Also
269273
--------
@@ -319,6 +323,13 @@ def loc(self) -> "_LocIndexer":
319323
max_speed shield
320324
sidewinder 7 8
321325
326+
Alignable boolean Series:
327+
328+
>>> df.loc[pd.Series([False, True, False],
329+
... index=['viper', 'sidewinder', 'cobra'])]
330+
max_speed shield
331+
sidewinder 7 8
332+
322333
Conditional that returns a boolean Series
323334
324335
>>> df.loc[df['shield'] > 6]

pandas/tests/groupby/test_apply.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import datetime
1+
from datetime import date, datetime
22
from io import StringIO
33

44
import numpy as np
@@ -1014,3 +1014,33 @@ def test_apply_with_timezones_aware():
10141014
result2 = df2.groupby("x", group_keys=False).apply(lambda df: df[["x", "y"]].copy())
10151015

10161016
tm.assert_frame_equal(result1, result2)
1017+
1018+
1019+
def test_apply_with_date_in_multiindex_does_not_convert_to_timestamp():
1020+
# GH 29617
1021+
1022+
df = pd.DataFrame(
1023+
{
1024+
"A": ["a", "a", "a", "b"],
1025+
"B": [
1026+
date(2020, 1, 10),
1027+
date(2020, 1, 10),
1028+
date(2020, 2, 10),
1029+
date(2020, 2, 10),
1030+
],
1031+
"C": [1, 2, 3, 4],
1032+
},
1033+
index=pd.Index([100, 101, 102, 103], name="idx"),
1034+
)
1035+
1036+
grp = df.groupby(["A", "B"])
1037+
result = grp.apply(lambda x: x.head(1))
1038+
1039+
expected = df.iloc[[0, 2, 3]]
1040+
expected = expected.reset_index()
1041+
expected.index = pd.MultiIndex.from_frame(expected[["A", "B", "idx"]])
1042+
expected = expected.drop(columns="idx")
1043+
1044+
tm.assert_frame_equal(result, expected)
1045+
for val in result.index.levels[1]:
1046+
assert type(val) is date

pandas/tests/groupby/test_function.py

+62
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,68 @@ def test_frame_describe_unstacked_format():
992992
tm.assert_frame_equal(result, expected)
993993

994994

995+
@pytest.mark.filterwarnings(
996+
"ignore:"
997+
"indexing past lexsort depth may impact performance:"
998+
"pandas.errors.PerformanceWarning"
999+
)
1000+
@pytest.mark.parametrize("as_index", [True, False])
1001+
def test_describe_with_duplicate_output_column_names(as_index):
1002+
# GH 35314
1003+
df = pd.DataFrame(
1004+
{
1005+
"a": [99, 99, 99, 88, 88, 88],
1006+
"b": [1, 2, 3, 4, 5, 6],
1007+
"c": [10, 20, 30, 40, 50, 60],
1008+
},
1009+
columns=["a", "b", "b"],
1010+
)
1011+
1012+
expected = (
1013+
pd.DataFrame.from_records(
1014+
[
1015+
("a", "count", 3.0, 3.0),
1016+
("a", "mean", 88.0, 99.0),
1017+
("a", "std", 0.0, 0.0),
1018+
("a", "min", 88.0, 99.0),
1019+
("a", "25%", 88.0, 99.0),
1020+
("a", "50%", 88.0, 99.0),
1021+
("a", "75%", 88.0, 99.0),
1022+
("a", "max", 88.0, 99.0),
1023+
("b", "count", 3.0, 3.0),
1024+
("b", "mean", 5.0, 2.0),
1025+
("b", "std", 1.0, 1.0),
1026+
("b", "min", 4.0, 1.0),
1027+
("b", "25%", 4.5, 1.5),
1028+
("b", "50%", 5.0, 2.0),
1029+
("b", "75%", 5.5, 2.5),
1030+
("b", "max", 6.0, 3.0),
1031+
("b", "count", 3.0, 3.0),
1032+
("b", "mean", 5.0, 2.0),
1033+
("b", "std", 1.0, 1.0),
1034+
("b", "min", 4.0, 1.0),
1035+
("b", "25%", 4.5, 1.5),
1036+
("b", "50%", 5.0, 2.0),
1037+
("b", "75%", 5.5, 2.5),
1038+
("b", "max", 6.0, 3.0),
1039+
],
1040+
)
1041+
.set_index([0, 1])
1042+
.T
1043+
)
1044+
expected.columns.names = [None, None]
1045+
expected.index = pd.Index([88, 99], name="a")
1046+
1047+
if as_index:
1048+
expected = expected.drop(columns=["a"], level=0)
1049+
else:
1050+
expected = expected.reset_index(drop=True)
1051+
1052+
result = df.groupby("a", as_index=as_index).describe()
1053+
1054+
tm.assert_frame_equal(result, expected)
1055+
1056+
9951057
def test_groupby_mean_no_overflow():
9961058
# Regression test for (#22487)
9971059
df = pd.DataFrame(

pandas/tests/groupby/test_groupby.py

+14
Original file line numberDiff line numberDiff line change
@@ -2055,3 +2055,17 @@ def test_groups_repr_truncates(max_seq_items, expected):
20552055

20562056
result = df.groupby(np.array(df.a)).groups.__repr__()
20572057
assert result == expected
2058+
2059+
2060+
def test_group_on_two_row_multiindex_returns_one_tuple_key():
2061+
# GH 18451
2062+
df = pd.DataFrame([{"a": 1, "b": 2, "c": 99}, {"a": 1, "b": 2, "c": 88}])
2063+
df = df.set_index(["a", "b"])
2064+
2065+
grp = df.groupby(["a", "b"])
2066+
result = grp.indices
2067+
expected = {(1, 2): np.array([0, 1], dtype=np.int64)}
2068+
2069+
assert len(result) == 1
2070+
key = (1, 2)
2071+
assert (result[key] == expected[key]).all()

pandas/tests/indexes/categorical/test_category.py

+6
Original file line numberDiff line numberDiff line change
@@ -478,3 +478,9 @@ def test_reindex_base(self):
478478
def test_map_str(self):
479479
# See test_map.py
480480
pass
481+
482+
def test_format_different_scalar_lengths(self):
483+
# GH35439
484+
idx = CategoricalIndex(["aaaaaaaaa", "b"])
485+
expected = ["aaaaaaaaa", "b"]
486+
assert idx.format() == expected

pandas/tests/indexes/common.py

+6
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,12 @@ def test_equals_op(self):
642642
tm.assert_numpy_array_equal(index_a == item, expected3)
643643
tm.assert_series_equal(series_a == item, Series(expected3))
644644

645+
def test_format(self):
646+
# GH35439
647+
idx = self.create_index()
648+
expected = [str(x) for x in idx]
649+
assert idx.format() == expected
650+
645651
def test_hasnans_isnans(self, index):
646652
# GH 11343, added tests for hasnans / isnans
647653
if isinstance(index, MultiIndex):

pandas/tests/indexes/datetimes/test_datetimelike.py

+6
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ def index(self, request):
2020
def create_index(self) -> DatetimeIndex:
2121
return date_range("20130101", periods=5)
2222

23+
def test_format(self):
24+
# GH35439
25+
idx = self.create_index()
26+
expected = [f"{x:%Y-%m-%d}" for x in idx]
27+
assert idx.format() == expected
28+
2329
def test_shift(self):
2430
pass # handled in test_ops
2531

0 commit comments

Comments
 (0)