Skip to content

Commit be11fc1

Browse files
authored
Merge pull request #204 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents 7beb0f9 + 3f67dc3 commit be11fc1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+943
-522
lines changed

asv_bench/benchmarks/frame_methods.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,9 @@ class Rank:
652652
]
653653

654654
def setup(self, dtype):
655-
self.df = DataFrame(np.random.randn(10000, 10), columns=range(10), dtype=dtype)
655+
self.df = DataFrame(
656+
np.random.randn(10000, 10).astype(dtype), columns=range(10), dtype=dtype
657+
)
656658

657659
def time_rank(self, dtype):
658660
self.df.rank()

asv_bench/benchmarks/plotting.py

+30
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1+
import importlib
2+
import sys
3+
14
import matplotlib
25
import numpy as np
6+
import pkg_resources
37

48
from pandas import (
59
DataFrame,
@@ -13,6 +17,8 @@
1317
except ImportError:
1418
from pandas.tools.plotting import andrews_curves
1519

20+
from pandas.plotting._core import _get_plot_backend
21+
1622
matplotlib.use("Agg")
1723

1824

@@ -99,4 +105,28 @@ def time_plot_andrews_curves(self):
99105
andrews_curves(self.df, "Name")
100106

101107

108+
class BackendLoading:
109+
repeat = 1
110+
number = 1
111+
warmup_time = 0
112+
113+
def setup(self):
114+
dist = pkg_resources.get_distribution("pandas")
115+
spec = importlib.machinery.ModuleSpec("my_backend", None)
116+
mod = importlib.util.module_from_spec(spec)
117+
mod.plot = lambda *args, **kwargs: 1
118+
119+
backends = pkg_resources.get_entry_map("pandas")
120+
my_entrypoint = pkg_resources.EntryPoint(
121+
"pandas_plotting_backend", mod.__name__, dist=dist
122+
)
123+
backends["pandas_plotting_backends"][mod.__name__] = my_entrypoint
124+
for i in range(10):
125+
backends["pandas_plotting_backends"][str(i)] = my_entrypoint
126+
sys.modules["my_backend"] = mod
127+
128+
def time_get_plot_backend(self):
129+
_get_plot_backend("my_backend")
130+
131+
102132
from .pandas_vb_common import setup # noqa: F401 isort:skip

doc/source/reference/style.rst

+1
Original file line numberDiff line numberDiff line change
@@ -67,5 +67,6 @@ Style export and import
6767
Styler.render
6868
Styler.export
6969
Styler.use
70+
Styler.to_html
7071
Styler.to_excel
7172
Styler.to_latex

doc/source/user_guide/options.rst

+2
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,8 @@ styler.sparse.index True "Sparsify" MultiIndex displ
487487
elements in outer levels within groups).
488488
styler.sparse.columns True "Sparsify" MultiIndex display for columns
489489
in Styler output.
490+
styler.render.max_elements 262144 Maximum number of datapoints that Styler will render
491+
trimming either rows, columns or both to fit.
490492
======================================= ============ ==================================
491493

492494

doc/source/whatsnew/v1.2.5.rst

+2
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@ Fixed regressions
1818
- Fixed regression in :meth:`DataFrame.sum` and :meth:`DataFrame.prod` when ``min_count`` and ``numeric_only`` are both given (:issue:`41074`)
1919
- Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`)
2020
- Regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`)
21+
- Regression in :func:`ExcelFile` when a corrupt file is opened but not closed (:issue:`41778`)
2122

2223
.. ---------------------------------------------------------------------------
2324
25+
2426
.. _whatsnew_125.bug_fixes:
2527

2628
Bug fixes

doc/source/whatsnew/v1.3.0.rst

+90-83
Large diffs are not rendered by default.

pandas/_libs/index.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,8 @@ cdef class IndexEngine:
106106

107107
try:
108108
return self.mapping.get_item(val)
109-
except (TypeError, ValueError):
109+
except (TypeError, ValueError, OverflowError):
110+
# GH#41775 OverflowError e.g. if we are uint64 and val is -1
110111
raise KeyError(val)
111112

112113
cdef inline _get_loc_duplicates(self, object val):

pandas/_libs/intervaltree.pxi.in

+39-29
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ cdef class IntervalTree(IntervalMixin):
3131
we are emulating the IndexEngine interface
3232
"""
3333
cdef readonly:
34-
object left, right, root, dtype
34+
ndarray left, right
35+
IntervalNode root
36+
object dtype
3537
str closed
3638
object _is_overlapping, _left_sorter, _right_sorter
3739

@@ -203,6 +205,41 @@ cdef sort_values_and_indices(all_values, all_indices, subset):
203205
# Nodes
204206
# ----------------------------------------------------------------------
205207

208+
@cython.internal
209+
cdef class IntervalNode:
210+
cdef readonly:
211+
int64_t n_elements, n_center, leaf_size
212+
bint is_leaf_node
213+
214+
def __repr__(self) -> str:
215+
if self.is_leaf_node:
216+
return (
217+
f"<{type(self).__name__}: {self.n_elements} elements (terminal)>"
218+
)
219+
else:
220+
n_left = self.left_node.n_elements
221+
n_right = self.right_node.n_elements
222+
n_center = self.n_elements - n_left - n_right
223+
return (
224+
f"<{type(self).__name__}: "
225+
f"pivot {self.pivot}, {self.n_elements} elements "
226+
f"({n_left} left, {n_right} right, {n_center} overlapping)>"
227+
)
228+
229+
def counts(self):
230+
"""
231+
Inspect counts on this node
232+
useful for debugging purposes
233+
"""
234+
if self.is_leaf_node:
235+
return self.n_elements
236+
else:
237+
m = len(self.center_left_values)
238+
l = self.left_node.counts()
239+
r = self.right_node.counts()
240+
return (m, (l, r))
241+
242+
206243
# we need specialized nodes and leaves to optimize for different dtype and
207244
# closed values
208245

@@ -240,7 +277,7 @@ NODE_CLASSES = {}
240277

241278

242279
@cython.internal
243-
cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode:
280+
cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode(IntervalNode):
244281
"""Non-terminal node for an IntervalTree
245282

246283
Categorizes intervals by those that fall to the left, those that fall to
@@ -252,8 +289,6 @@ cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode:
252289
int64_t[:] center_left_indices, center_right_indices, indices
253290
{{dtype}}_t min_left, max_right
254291
{{dtype}}_t pivot
255-
int64_t n_elements, n_center, leaf_size
256-
bint is_leaf_node
257292

258293
def __init__(self,
259294
ndarray[{{dtype}}_t, ndim=1] left,
@@ -381,31 +416,6 @@ cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode:
381416
else:
382417
result.extend(self.center_left_indices)
383418

384-
def __repr__(self) -> str:
385-
if self.is_leaf_node:
386-
return ('<{{dtype_title}}Closed{{closed_title}}IntervalNode: '
387-
'%s elements (terminal)>' % self.n_elements)
388-
else:
389-
n_left = self.left_node.n_elements
390-
n_right = self.right_node.n_elements
391-
n_center = self.n_elements - n_left - n_right
392-
return ('<{{dtype_title}}Closed{{closed_title}}IntervalNode: '
393-
'pivot %s, %s elements (%s left, %s right, %s '
394-
'overlapping)>' % (self.pivot, self.n_elements,
395-
n_left, n_right, n_center))
396-
397-
def counts(self):
398-
"""
399-
Inspect counts on this node
400-
useful for debugging purposes
401-
"""
402-
if self.is_leaf_node:
403-
return self.n_elements
404-
else:
405-
m = len(self.center_left_values)
406-
l = self.left_node.counts()
407-
r = self.right_node.counts()
408-
return (m, (l, r))
409419

410420
NODE_CLASSES['{{dtype}}',
411421
'{{closed}}'] = {{dtype_title}}Closed{{closed_title}}IntervalNode

pandas/_libs/lib.pyi

+6-5
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def maybe_convert_objects(
7373
convert_datetime: Literal[False] = ...,
7474
convert_timedelta: bool = ...,
7575
convert_period: Literal[False] = ...,
76+
convert_interval: Literal[False] = ...,
7677
convert_to_nullable_integer: Literal[False] = ...,
7778
dtype_if_all_nat: DtypeObj | None = ...,
7879
) -> np.ndarray: ...
@@ -86,6 +87,7 @@ def maybe_convert_objects(
8687
convert_datetime: bool = ...,
8788
convert_timedelta: bool = ...,
8889
convert_period: bool = ...,
90+
convert_interval: bool = ...,
8991
convert_to_nullable_integer: Literal[True] = ...,
9092
dtype_if_all_nat: DtypeObj | None = ...,
9193
) -> ArrayLike: ...
@@ -99,6 +101,7 @@ def maybe_convert_objects(
99101
convert_datetime: Literal[True] = ...,
100102
convert_timedelta: bool = ...,
101103
convert_period: bool = ...,
104+
convert_interval: bool = ...,
102105
convert_to_nullable_integer: bool = ...,
103106
dtype_if_all_nat: DtypeObj | None = ...,
104107
) -> ArrayLike: ...
@@ -112,6 +115,7 @@ def maybe_convert_objects(
112115
convert_datetime: bool = ...,
113116
convert_timedelta: bool = ...,
114117
convert_period: Literal[True] = ...,
118+
convert_interval: bool = ...,
115119
convert_to_nullable_integer: bool = ...,
116120
dtype_if_all_nat: DtypeObj | None = ...,
117121
) -> ArrayLike: ...
@@ -125,6 +129,7 @@ def maybe_convert_objects(
125129
convert_datetime: bool = ...,
126130
convert_timedelta: bool = ...,
127131
convert_period: bool = ...,
132+
convert_interval: bool = ...,
128133
convert_to_nullable_integer: bool = ...,
129134
dtype_if_all_nat: DtypeObj | None = ...,
130135
) -> ArrayLike: ...
@@ -190,11 +195,7 @@ def maybe_indices_to_slice(
190195
max_len: int,
191196
) -> slice | np.ndarray: ... # np.ndarray[np.uint8]
192197

193-
def clean_index_list(obj: list) -> tuple[
194-
list | np.ndarray, # np.ndarray[object | np.int64 | np.uint64]
195-
bool,
196-
]: ...
197-
198+
def is_all_arraylike(obj: list) -> bool: ...
198199

199200
# -----------------------------------------------------------------
200201
# Functions which in reality take memoryviews

0 commit comments

Comments
 (0)