Skip to content

Sync Fork from Upstream Repo #153

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Mar 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
b94a1ee
⬆️ UPGRADE: Autoupdate pre-commit config (#40564)
github-actions[bot] Mar 22, 2021
f841eda
TYP: nattype.pyi (#40503)
jbrockmendel Mar 22, 2021
a68d28c
CLN: make `cell_context` DefaultDict like `ctx` - simplify code (#40453)
attack68 Mar 22, 2021
d6fe5f0
CLN: remove FloatBlock, share _can_hold_na (#40526)
jbrockmendel Mar 22, 2021
796ecaa
CLN: remove unreachable quantile code (#40547)
jbrockmendel Mar 22, 2021
33680ab
REF: share external_values ArrayManager/BlockManager (#40529)
jbrockmendel Mar 22, 2021
e0ae6f2
CLN: a couple of minor cleanups (#40567)
simonjayhawkins Mar 22, 2021
0f5bd77
CLN: remove unused axis keyword from Block.where (#40561)
jbrockmendel Mar 22, 2021
c7c9c6b
CLN/PERF: remove unused out kwd in take_nd (#40510)
jbrockmendel Mar 22, 2021
ced764d
DEPR: use DeprecationWarning instead of FutureWarning for Categorical…
jorisvandenbossche Mar 22, 2021
a3ebb63
TST/CLN: remove redundant to_json test (#40577)
mzeitlin11 Mar 23, 2021
05a0e98
CLN: Optional[Hashable] in dict type hints (#40534)
topper-123 Mar 23, 2021
f115360
PERF: no need to check for DataFrame in pandas.core.computation.expre…
jorisvandenbossche Mar 23, 2021
23c9661
CLN: remove unused kwarg from IntervalIndex._searchsorted_monotonic (…
jbrockmendel Mar 23, 2021
1dab473
BUG: to_json failing on PyPy (#40525)
mzeitlin11 Mar 23, 2021
79f1801
COMPAT: add back dummy CategoricalBlock class (#40582)
jorisvandenbossche Mar 23, 2021
86d5980
REF/PERF: deduplicate kth_smallest (#40559)
mzeitlin11 Mar 23, 2021
4e0d0b4
ENH: applymap get kwargs #39987 (#40562)
alexprincel Mar 23, 2021
7e89bb8
CLN: Remove unnecessary assignment (#40553)
hasan-yaman Mar 23, 2021
c4b8624
TST: `test_highlight.py` convert to functional tests not class (#40551)
attack68 Mar 23, 2021
e56403f
TST: `test_tooltip.py` convert to functional tests instead of class (…
attack68 Mar 23, 2021
b118e5b
TYP: first_valid_index & last_valid_index (#40535)
topper-123 Mar 23, 2021
5467c5c
TYP: io.sas (#40524)
jbrockmendel Mar 23, 2021
9535280
REF: move shift logic from BlockManager to DataFrame (#40536)
jbrockmendel Mar 23, 2021
2700775
REF: move roperators to pandas.core (#40444)
jorisvandenbossche Mar 23, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ repos:
types: [text]
args: [--append-config=flake8/cython-template.cfg]
- repo: https://github.com/PyCQA/isort
rev: 5.7.0
rev: 5.8.0
hooks:
- id: isort
- repo: https://github.com/asottile/pyupgrade
rev: v2.10.0
rev: v2.11.0
hooks:
- id: pyupgrade
args: [--py37-plus, --keep-runtime-typing]
Expand Down
21 changes: 16 additions & 5 deletions asv_bench/benchmarks/io/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pandas import DataFrame


class RenderApply:
class Render:

params = [[12, 24, 36], [12, 120]]
param_names = ["cols", "rows"]
Expand All @@ -14,15 +14,21 @@ def setup(self, cols, rows):
columns=[f"float_{i+1}" for i in range(cols)],
index=[f"row_{i+1}" for i in range(rows)],
)
self._style_apply()

def time_render(self, cols, rows):
def time_apply_render(self, cols, rows):
self._style_apply()
self.st.render()

def peakmem_apply(self, cols, rows):
def peakmem_apply_render(self, cols, rows):
self._style_apply()
self.st.render()

def peakmem_render(self, cols, rows):
def time_classes_render(self, cols, rows):
self._style_classes()
self.st.render()

def peakmem_classes_render(self, cols, rows):
self._style_classes()
self.st.render()

def _style_apply(self):
Expand All @@ -32,3 +38,8 @@ def _apply_func(s):
]

self.st = self.df.style.apply(_apply_func, axis=1)

def _style_classes(self):
classes = self.df.applymap(lambda v: ("cls-1" if v > 0 else ""))
classes.index, classes.columns = self.df.index, self.df.columns
self.st = self.df.style.set_td_classes(classes)
2 changes: 1 addition & 1 deletion ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ import sys
import pandas

blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
'lxml', 'matplotlib', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
'lxml', 'matplotlib', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}

# GH#28227 for some of these check for top-level modules, while others are
Expand Down
1 change: 0 additions & 1 deletion doc/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def __init__(

if single_doc:
single_doc = self._process_single_doc(single_doc)
include_api = False
os.environ["SPHINX_PATTERN"] = single_doc
elif not include_api:
os.environ["SPHINX_PATTERN"] = "-api"
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Fixed regressions
~~~~~~~~~~~~~~~~~

- Fixed regression in :meth:`DataFrame.sum` when ``min_count`` greater than the :class:`DataFrame` shape was passed resulted in a ``ValueError`` (:issue:`39738`)
- Fixed regression in :meth:`DataFrame.to_json` raising ``AttributeError`` when run on PyPy (:issue:`39837`)
-

.. ---------------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ Other enhancements
- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`)
- :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
- :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
- :meth:`DataFrame.applymap` can now accept kwargs to pass on to func (:issue:`39987`)
- Disallow :class:`DataFrame` indexer for ``iloc`` for :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__`, (:issue:`39004`)
- :meth:`Series.apply` can now accept list-like or dictionary-like arguments that aren't lists or dictionaries, e.g. ``ser.apply(np.array(["sum", "mean"]))``, which was already the case for :meth:`DataFrame.apply` (:issue:`39140`)
- :meth:`DataFrame.plot.scatter` can now accept a categorical column as the argument to ``c`` (:issue:`12380`, :issue:`31357`)
Expand Down
19 changes: 1 addition & 18 deletions pandas/_libs/algos.pxd
Original file line number Diff line number Diff line change
@@ -1,21 +1,4 @@
from pandas._libs.util cimport numeric


cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
cdef:
numeric t

# cython doesn't allow pointer dereference so use array syntax
t = a[0]
a[0] = b[0]
b[0] = t
return 0


cdef enum TiebreakEnumType:
TIEBREAK_AVERAGE
TIEBREAK_MIN,
TIEBREAK_MAX
TIEBREAK_FIRST
TIEBREAK_FIRST_DESCENDING
TIEBREAK_DENSE
cdef numeric kth_smallest_c(numeric* arr, Py_ssize_t k, Py_ssize_t n) nogil
95 changes: 72 additions & 23 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ cdef:
float64_t NaN = <float64_t>np.NaN
int64_t NPY_NAT = get_nat()

cdef enum TiebreakEnumType:
TIEBREAK_AVERAGE
TIEBREAK_MIN,
TIEBREAK_MAX
TIEBREAK_FIRST
TIEBREAK_FIRST_DESCENDING
TIEBREAK_DENSE

tiebreakers = {
"average": TIEBREAK_AVERAGE,
"min": TIEBREAK_MIN,
Expand Down Expand Up @@ -237,34 +245,75 @@ def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
return indexer, counts


cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
cdef:
numeric t

# cython doesn't allow pointer dereference so use array syntax
t = a[0]
a[0] = b[0]
b[0] = t
return 0


cdef inline numeric kth_smallest_c(numeric* arr, Py_ssize_t k, Py_ssize_t n) nogil:
"""
See kth_smallest.__doc__. The additional parameter n specifies the maximum
number of elements considered in arr, needed for compatibility with usage
in groupby.pyx
"""
cdef:
Py_ssize_t i, j, l, m
numeric x

l = 0
m = n - 1

while l < m:
x = arr[k]
i = l
j = m

while 1:
while arr[i] < x: i += 1
while x < arr[j]: j -= 1
if i <= j:
swap(&arr[i], &arr[j])
i += 1; j -= 1

if i > j: break

if j < k: l = i
if k < i: m = j
return arr[k]


@cython.boundscheck(False)
@cython.wraparound(False)
def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric:
def kth_smallest(numeric[::1] arr, Py_ssize_t k) -> numeric:
"""
Compute the kth smallest value in arr. Note that the input
array will be modified.

Parameters
----------
arr : numeric[::1]
Array to compute the kth smallest value for, must be
contiguous
k : Py_ssize_t

Returns
-------
numeric
The kth smallest value in arr
"""
cdef:
Py_ssize_t i, j, l, m, n = a.shape[0]
numeric x
numeric result

with nogil:
l = 0
m = n - 1

while l < m:
x = a[k]
i = l
j = m

while 1:
while a[i] < x: i += 1
while x < a[j]: j -= 1
if i <= j:
swap(&a[i], &a[j])
i += 1; j -= 1

if i > j: break

if j < k: l = i
if k < i: m = j
return a[k]
result = kth_smallest_c(&arr[0], k, arr.shape[0])

return result


# ----------------------------------------------------------------------
Expand Down
33 changes: 2 additions & 31 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ from numpy.math cimport NAN

cnp.import_array()

from pandas._libs.algos cimport swap
from pandas._libs.algos cimport kth_smallest_c
from pandas._libs.util cimport (
get_nat,
numeric,
Expand Down Expand Up @@ -88,7 +88,7 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil:
n -= na_count

if n % 2:
result = kth_smallest_c( a, n // 2, n)
result = kth_smallest_c(a, n // 2, n)
else:
result = (kth_smallest_c(a, n // 2, n) +
kth_smallest_c(a, n // 2 - 1, n)) / 2
Expand All @@ -99,35 +99,6 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil:
return result


# TODO: Is this redundant with algos.kth_smallest
cdef inline float64_t kth_smallest_c(float64_t* a,
Py_ssize_t k,
Py_ssize_t n) nogil:
cdef:
Py_ssize_t i, j, l, m
float64_t x, t

l = 0
m = n - 1
while l < m:
x = a[k]
i = l
j = m

while 1:
while a[i] < x: i += 1
while x < a[j]: j -= 1
if i <= j:
swap(&a[i], &a[j])
i += 1; j -= 1

if i > j: break

if j < k: l = i
if k < i: m = j
return a[k]


@cython.boundscheck(False)
@cython.wraparound(False)
def group_median_float64(ndarray[float64_t, ndim=2] out,
Expand Down
23 changes: 11 additions & 12 deletions pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,18 +272,6 @@ static PyObject *get_sub_attr(PyObject *obj, char *attr, char *subAttr) {
return ret;
}

static int is_simple_frame(PyObject *obj) {
PyObject *check = get_sub_attr(obj, "_mgr", "is_mixed_type");
int ret = (check == Py_False);

if (!check) {
return 0;
}

Py_DECREF(check);
return ret;
}

static Py_ssize_t get_attr_length(PyObject *obj, char *attr) {
PyObject *tmp = PyObject_GetAttrString(obj, attr);
Py_ssize_t ret;
Expand All @@ -301,6 +289,17 @@ static Py_ssize_t get_attr_length(PyObject *obj, char *attr) {
return ret;
}

static int is_simple_frame(PyObject *obj) {
PyObject *mgr = PyObject_GetAttrString(obj, "_mgr");
if (!mgr) {
return 0;
}
int ret = (get_attr_length(mgr, "blocks") <= 1);

Py_DECREF(mgr);
return ret;
}

static npy_int64 get_long_attr(PyObject *o, const char *attr) {
npy_int64 long_val;
PyObject *value = PyObject_GetAttrString(o, attr);
Expand Down
Loading