Skip to content

Commit 30aa926

Browse files
authored
Merge pull request #153 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents 1bb0708 + 2700775 commit 30aa926

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+787
-868
lines changed

.pre-commit-config.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@ repos:
4747
types: [text]
4848
args: [--append-config=flake8/cython-template.cfg]
4949
- repo: https://github.com/PyCQA/isort
50-
rev: 5.7.0
50+
rev: 5.8.0
5151
hooks:
5252
- id: isort
5353
- repo: https://github.com/asottile/pyupgrade
54-
rev: v2.10.0
54+
rev: v2.11.0
5555
hooks:
5656
- id: pyupgrade
5757
args: [--py37-plus, --keep-runtime-typing]

asv_bench/benchmarks/io/style.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from pandas import DataFrame
44

55

6-
class RenderApply:
6+
class Render:
77

88
params = [[12, 24, 36], [12, 120]]
99
param_names = ["cols", "rows"]
@@ -14,15 +14,21 @@ def setup(self, cols, rows):
1414
columns=[f"float_{i+1}" for i in range(cols)],
1515
index=[f"row_{i+1}" for i in range(rows)],
1616
)
17-
self._style_apply()
1817

19-
def time_render(self, cols, rows):
18+
def time_apply_render(self, cols, rows):
19+
self._style_apply()
2020
self.st.render()
2121

22-
def peakmem_apply(self, cols, rows):
22+
def peakmem_apply_render(self, cols, rows):
2323
self._style_apply()
24+
self.st.render()
2425

25-
def peakmem_render(self, cols, rows):
26+
def time_classes_render(self, cols, rows):
27+
self._style_classes()
28+
self.st.render()
29+
30+
def peakmem_classes_render(self, cols, rows):
31+
self._style_classes()
2632
self.st.render()
2733

2834
def _style_apply(self):
@@ -32,3 +38,8 @@ def _apply_func(s):
3238
]
3339

3440
self.st = self.df.style.apply(_apply_func, axis=1)
41+
42+
def _style_classes(self):
43+
classes = self.df.applymap(lambda v: ("cls-1" if v > 0 else ""))
44+
classes.index, classes.columns = self.df.index, self.df.columns
45+
self.st = self.df.style.set_td_classes(classes)

ci/code_checks.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ import sys
118118
import pandas
119119
120120
blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
121-
'lxml', 'matplotlib', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
121+
'lxml', 'matplotlib', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
122122
'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
123123
124124
# GH#28227 for some of these check for top-level modules, while others are

doc/make.py

-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ def __init__(
5454

5555
if single_doc:
5656
single_doc = self._process_single_doc(single_doc)
57-
include_api = False
5857
os.environ["SPHINX_PATTERN"] = single_doc
5958
elif not include_api:
6059
os.environ["SPHINX_PATTERN"] = "-api"

doc/source/whatsnew/v1.2.4.rst

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717

1818
- Fixed regression in :meth:`DataFrame.sum` when ``min_count`` greater than the :class:`DataFrame` shape was passed resulted in a ``ValueError`` (:issue:`39738`)
19+
- Fixed regression in :meth:`DataFrame.to_json` raising ``AttributeError`` when run on PyPy (:issue:`39837`)
1920
-
2021

2122
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ Other enhancements
128128
- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`)
129129
- :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
130130
- :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
131+
- :meth:`DataFrame.applymap` can now accept kwargs to pass on to func (:issue:`39987`)
131132
- Disallow :class:`DataFrame` indexer for ``iloc`` for :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__`, (:issue:`39004`)
132133
- :meth:`Series.apply` can now accept list-like or dictionary-like arguments that aren't lists or dictionaries, e.g. ``ser.apply(np.array(["sum", "mean"]))``, which was already the case for :meth:`DataFrame.apply` (:issue:`39140`)
133134
- :meth:`DataFrame.plot.scatter` can now accept a categorical column as the argument to ``c`` (:issue:`12380`, :issue:`31357`)

pandas/_libs/algos.pxd

+1-18
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,4 @@
11
from pandas._libs.util cimport numeric
22

33

4-
cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
5-
cdef:
6-
numeric t
7-
8-
# cython doesn't allow pointer dereference so use array syntax
9-
t = a[0]
10-
a[0] = b[0]
11-
b[0] = t
12-
return 0
13-
14-
15-
cdef enum TiebreakEnumType:
16-
TIEBREAK_AVERAGE
17-
TIEBREAK_MIN,
18-
TIEBREAK_MAX
19-
TIEBREAK_FIRST
20-
TIEBREAK_FIRST_DESCENDING
21-
TIEBREAK_DENSE
4+
cdef numeric kth_smallest_c(numeric* arr, Py_ssize_t k, Py_ssize_t n) nogil

pandas/_libs/algos.pyx

+72-23
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,14 @@ cdef:
6464
float64_t NaN = <float64_t>np.NaN
6565
int64_t NPY_NAT = get_nat()
6666

67+
cdef enum TiebreakEnumType:
68+
TIEBREAK_AVERAGE
69+
TIEBREAK_MIN,
70+
TIEBREAK_MAX
71+
TIEBREAK_FIRST
72+
TIEBREAK_FIRST_DESCENDING
73+
TIEBREAK_DENSE
74+
6775
tiebreakers = {
6876
"average": TIEBREAK_AVERAGE,
6977
"min": TIEBREAK_MIN,
@@ -237,34 +245,75 @@ def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
237245
return indexer, counts
238246

239247

248+
cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
249+
cdef:
250+
numeric t
251+
252+
# cython doesn't allow pointer dereference so use array syntax
253+
t = a[0]
254+
a[0] = b[0]
255+
b[0] = t
256+
return 0
257+
258+
259+
cdef inline numeric kth_smallest_c(numeric* arr, Py_ssize_t k, Py_ssize_t n) nogil:
260+
"""
261+
See kth_smallest.__doc__. The additional parameter n specifies the maximum
262+
number of elements considered in arr, needed for compatibility with usage
263+
in groupby.pyx
264+
"""
265+
cdef:
266+
Py_ssize_t i, j, l, m
267+
numeric x
268+
269+
l = 0
270+
m = n - 1
271+
272+
while l < m:
273+
x = arr[k]
274+
i = l
275+
j = m
276+
277+
while 1:
278+
while arr[i] < x: i += 1
279+
while x < arr[j]: j -= 1
280+
if i <= j:
281+
swap(&arr[i], &arr[j])
282+
i += 1; j -= 1
283+
284+
if i > j: break
285+
286+
if j < k: l = i
287+
if k < i: m = j
288+
return arr[k]
289+
290+
240291
@cython.boundscheck(False)
241292
@cython.wraparound(False)
242-
def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric:
293+
def kth_smallest(numeric[::1] arr, Py_ssize_t k) -> numeric:
294+
"""
295+
Compute the kth smallest value in arr. Note that the input
296+
array will be modified.
297+
298+
Parameters
299+
----------
300+
arr : numeric[::1]
301+
Array to compute the kth smallest value for, must be
302+
contiguous
303+
k : Py_ssize_t
304+
305+
Returns
306+
-------
307+
numeric
308+
The kth smallest value in arr
309+
"""
243310
cdef:
244-
Py_ssize_t i, j, l, m, n = a.shape[0]
245-
numeric x
311+
numeric result
246312

247313
with nogil:
248-
l = 0
249-
m = n - 1
250-
251-
while l < m:
252-
x = a[k]
253-
i = l
254-
j = m
255-
256-
while 1:
257-
while a[i] < x: i += 1
258-
while x < a[j]: j -= 1
259-
if i <= j:
260-
swap(&a[i], &a[j])
261-
i += 1; j -= 1
262-
263-
if i > j: break
264-
265-
if j < k: l = i
266-
if k < i: m = j
267-
return a[k]
314+
result = kth_smallest_c(&arr[0], k, arr.shape[0])
315+
316+
return result
268317

269318

270319
# ----------------------------------------------------------------------

pandas/_libs/groupby.pyx

+2-31
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ from numpy.math cimport NAN
3030

3131
cnp.import_array()
3232

33-
from pandas._libs.algos cimport swap
33+
from pandas._libs.algos cimport kth_smallest_c
3434
from pandas._libs.util cimport (
3535
get_nat,
3636
numeric,
@@ -88,7 +88,7 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil:
8888
n -= na_count
8989

9090
if n % 2:
91-
result = kth_smallest_c( a, n // 2, n)
91+
result = kth_smallest_c(a, n // 2, n)
9292
else:
9393
result = (kth_smallest_c(a, n // 2, n) +
9494
kth_smallest_c(a, n // 2 - 1, n)) / 2
@@ -99,35 +99,6 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil:
9999
return result
100100

101101

102-
# TODO: Is this redundant with algos.kth_smallest
103-
cdef inline float64_t kth_smallest_c(float64_t* a,
104-
Py_ssize_t k,
105-
Py_ssize_t n) nogil:
106-
cdef:
107-
Py_ssize_t i, j, l, m
108-
float64_t x, t
109-
110-
l = 0
111-
m = n - 1
112-
while l < m:
113-
x = a[k]
114-
i = l
115-
j = m
116-
117-
while 1:
118-
while a[i] < x: i += 1
119-
while x < a[j]: j -= 1
120-
if i <= j:
121-
swap(&a[i], &a[j])
122-
i += 1; j -= 1
123-
124-
if i > j: break
125-
126-
if j < k: l = i
127-
if k < i: m = j
128-
return a[k]
129-
130-
131102
@cython.boundscheck(False)
132103
@cython.wraparound(False)
133104
def group_median_float64(ndarray[float64_t, ndim=2] out,

pandas/_libs/src/ujson/python/objToJSON.c

+11-12
Original file line numberDiff line numberDiff line change
@@ -272,18 +272,6 @@ static PyObject *get_sub_attr(PyObject *obj, char *attr, char *subAttr) {
272272
return ret;
273273
}
274274

275-
static int is_simple_frame(PyObject *obj) {
276-
PyObject *check = get_sub_attr(obj, "_mgr", "is_mixed_type");
277-
int ret = (check == Py_False);
278-
279-
if (!check) {
280-
return 0;
281-
}
282-
283-
Py_DECREF(check);
284-
return ret;
285-
}
286-
287275
static Py_ssize_t get_attr_length(PyObject *obj, char *attr) {
288276
PyObject *tmp = PyObject_GetAttrString(obj, attr);
289277
Py_ssize_t ret;
@@ -301,6 +289,17 @@ static Py_ssize_t get_attr_length(PyObject *obj, char *attr) {
301289
return ret;
302290
}
303291

292+
static int is_simple_frame(PyObject *obj) {
293+
PyObject *mgr = PyObject_GetAttrString(obj, "_mgr");
294+
if (!mgr) {
295+
return 0;
296+
}
297+
int ret = (get_attr_length(mgr, "blocks") <= 1);
298+
299+
Py_DECREF(mgr);
300+
return ret;
301+
}
302+
304303
static npy_int64 get_long_attr(PyObject *o, const char *attr) {
305304
npy_int64 long_val;
306305
PyObject *value = PyObject_GetAttrString(o, attr);

0 commit comments

Comments
 (0)