Skip to content

Commit c81dd2e

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into libwriters
2 parents da0b51e + 8cbee35 commit c81dd2e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+778
-1802
lines changed

doc/source/whatsnew/v0.23.0.txt

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ Performance Improvements
380380
- Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`)
381381
- :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`)
382382
- Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`)
383+
- Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`)
383384

384385

385386
.. _whatsnew_0230.docs:
@@ -431,6 +432,7 @@ Timezones
431432
- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`)
432433
- Bug in tz-aware :class:`DatetimeIndex` where addition/subtraction with a :class:`TimedeltaIndex` or array with ``dtype='timedelta64[ns]'`` was incorrect (:issue:`17558`)
433434
- Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`)
435+
- Bug in the :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`)
434436

435437
Offsets
436438
^^^^^^^
@@ -476,7 +478,11 @@ MultiIndex
476478
- Bug in :func:`MultiIndex.get_level_values` which would return an invalid index on level of ints with missing values (:issue:`17924`)
477479
- Bug in :func:`MultiIndex.remove_unused_levels` which would fill nan values (:issue:`18417`)
478480
- Bug in :func:`MultiIndex.from_tuples`` which would fail to take zipped tuples in python3 (:issue:`18434`)
479-
-
481+
- Bug in :func:`MultiIndex.get_loc`` which would fail to automatically cast values between float and int (:issue:`18818`, :issue:`15994`)
482+
- Bug in :func:`MultiIndex.get_loc`` which would cast boolean to integer labels (:issue:`19086`)
483+
- Bug in :func:`MultiIndex.get_loc`` which would fail to locate keys containing ``NaN`` (:issue:`18485`)
484+
- Bug in :func:`MultiIndex.get_loc`` in large :class:`MultiIndex`, would fail when levels had different dtypes (:issue:`18520`)
485+
480486

481487
I/O
482488
^^^
@@ -489,6 +495,8 @@ I/O
489495
- Bug in :func:`DataFrame.to_latex()` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`)
490496
- Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`)
491497
- Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`)
498+
- :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`)
499+
- :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for xls file type (:issue:`19242`, :issue:`9155`)
492500
-
493501

494502
Plotting
@@ -512,7 +520,7 @@ Groupby/Resample/Rolling
512520
Sparse
513521
^^^^^^
514522

515-
-
523+
- Bug in which creating a ``SparseDataFrame`` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`)
516524
-
517525
-
518526

pandas/_libs/algos.pyx

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,34 @@
11
# cython: profile=False
22

3-
cimport numpy as np
4-
import numpy as np
5-
63
cimport cython
74
from cython cimport Py_ssize_t
85

9-
np.import_array()
10-
11-
cdef float64_t FP_ERR = 1e-13
12-
13-
cimport util
14-
156
from libc.stdlib cimport malloc, free
167
from libc.string cimport memmove
8+
from libc.math cimport fabs, sqrt
179

10+
import numpy as np
11+
cimport numpy as cnp
1812
from numpy cimport (ndarray,
1913
NPY_INT64, NPY_UINT64, NPY_INT32, NPY_INT16, NPY_INT8,
2014
NPY_FLOAT32, NPY_FLOAT64,
2115
NPY_OBJECT,
2216
int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
2317
uint32_t, uint64_t, float32_t, float64_t,
2418
double_t)
19+
cnp.import_array()
2520

2621

27-
cdef double NaN = <double> np.NaN
28-
cdef double nan = NaN
29-
30-
from libc.math cimport fabs, sqrt
31-
32-
# this is our util.pxd
22+
cimport util
3323
from util cimport numeric, get_nat
3424

3525
import missing
3626

27+
cdef float64_t FP_ERR = 1e-13
28+
29+
cdef double NaN = <double> np.NaN
30+
cdef double nan = NaN
31+
3732
cdef int64_t iNaT = get_nat()
3833

3934
cdef:

pandas/_libs/algos_rank_helper.pxi.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
5050

5151
ndarray[float64_t] ranks
5252
ndarray[int64_t] argsorted
53-
ndarray[np.uint8_t, cast=True] sorted_mask
53+
ndarray[uint8_t, cast=True] sorted_mask
5454

5555
{{if dtype == 'uint64'}}
5656
{{ctype}} val

pandas/_libs/hashtable.pxd

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,6 @@ cdef class PyObjectHashTable(HashTable):
3131
cpdef get_item(self, object val)
3232
cpdef set_item(self, object key, Py_ssize_t val)
3333

34-
cdef class MultiIndexHashTable(HashTable):
35-
cdef:
36-
kh_uint64_t *table
37-
object mi
38-
39-
cpdef get_item(self, object val)
40-
cpdef set_item(self, object key, Py_ssize_t val)
41-
cdef inline void _check_for_collision(self, Py_ssize_t loc, object label)
42-
4334

4435
cdef class StringHashTable(HashTable):
4536
cdef kh_str_t *table

pandas/_libs/hashtable.pyx

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,22 @@
11
# cython: profile=False
22

3-
from cpython cimport PyObject, Py_INCREF, PyList_Check, PyTuple_Check
3+
cimport cython
4+
5+
from cpython cimport (PyObject, Py_INCREF, PyList_Check, PyTuple_Check,
6+
PyMem_Malloc, PyMem_Realloc, PyMem_Free,
7+
PyString_Check, PyBytes_Check,
8+
PyUnicode_Check)
9+
10+
from libc.stdlib cimport malloc, free
11+
12+
import numpy as np
13+
cimport numpy as cnp
14+
from numpy cimport ndarray, uint8_t, uint32_t
15+
cnp.import_array()
16+
17+
cdef extern from "numpy/npy_math.h":
18+
double NAN "NPY_NAN"
19+
420

521
from khash cimport (
622
khiter_t,
@@ -23,29 +39,13 @@ from khash cimport (
2339
kh_put_pymap, kh_resize_pymap)
2440

2541

26-
from numpy cimport ndarray, uint8_t, uint32_t
27-
28-
from libc.stdlib cimport malloc, free
29-
from cpython cimport (PyMem_Malloc, PyMem_Realloc, PyMem_Free,
30-
PyString_Check, PyBytes_Check,
31-
PyUnicode_Check)
32-
3342
from util cimport _checknan
3443
cimport util
3544

36-
import numpy as np
37-
nan = np.nan
38-
39-
cdef extern from "numpy/npy_math.h":
40-
double NAN "NPY_NAN"
41-
42-
cimport cython
43-
cimport numpy as cnp
44-
4545
from missing cimport checknull
4646

47-
cnp.import_array()
48-
cnp.import_ufunc()
47+
48+
nan = np.nan
4949

5050
cdef int64_t iNaT = util.get_nat()
5151
_SIZE_HINT_LIMIT = (1 << 20) + 7

pandas/_libs/hashtable_class_helper.pxi.in

Lines changed: 0 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -899,139 +899,3 @@ cdef class PyObjectHashTable(HashTable):
899899
count += 1
900900

901901
return np.asarray(labels)
902-
903-
904-
cdef class MultiIndexHashTable(HashTable):
905-
906-
def __init__(self, size_hint=1):
907-
self.table = kh_init_uint64()
908-
self.mi = None
909-
kh_resize_uint64(self.table, size_hint)
910-
911-
def __dealloc__(self):
912-
if self.table is not NULL:
913-
kh_destroy_uint64(self.table)
914-
self.table = NULL
915-
916-
def __len__(self):
917-
return self.table.size
918-
919-
def sizeof(self, deep=False):
920-
""" return the size of my table in bytes """
921-
return self.table.n_buckets * (sizeof(uint64_t) + # keys
922-
sizeof(size_t) + # vals
923-
sizeof(uint32_t)) # flags
924-
925-
def _check_for_collisions(self, int64_t[:] locs, object mi):
926-
# validate that the locs map to the actual values
927-
# provided in the mi
928-
# we can only check if we *don't* have any missing values
929-
# :<
930-
cdef:
931-
ndarray[int64_t] alocs
932-
933-
alocs = np.asarray(locs)
934-
if (alocs != -1).all():
935-
936-
result = self.mi.take(locs)
937-
if isinstance(mi, tuple):
938-
from pandas import Index
939-
mi = Index([mi])
940-
if not result.equals(mi):
941-
raise AssertionError(
942-
"hash collision\nlocs:\n{}\n"
943-
"result:\n{}\nmi:\n{}".format(alocs, result, mi))
944-
945-
cdef inline void _check_for_collision(self, Py_ssize_t loc, object label):
946-
# validate that the loc maps to the actual value
947-
# version of _check_for_collisions above for single label (tuple)
948-
949-
result = self.mi[loc]
950-
951-
if not all(l == r or (is_null_datetimelike(l)
952-
and is_null_datetimelike(r))
953-
for l, r in zip(result, label)):
954-
raise AssertionError(
955-
"hash collision\nloc:\n{}\n"
956-
"result:\n{}\nmi:\n{}".format(loc, result, label))
957-
958-
def __contains__(self, object key):
959-
try:
960-
self.get_item(key)
961-
return True
962-
except (KeyError, ValueError, TypeError):
963-
return False
964-
965-
cpdef get_item(self, object key):
966-
cdef:
967-
khiter_t k
968-
uint64_t value
969-
int64_t[:] locs
970-
Py_ssize_t loc
971-
972-
value = self.mi._hashed_indexing_key(key)
973-
k = kh_get_uint64(self.table, value)
974-
if k != self.table.n_buckets:
975-
loc = self.table.vals[k]
976-
self._check_for_collision(loc, key)
977-
return loc
978-
else:
979-
raise KeyError(key)
980-
981-
cpdef set_item(self, object key, Py_ssize_t val):
982-
raise NotImplementedError
983-
984-
@cython.boundscheck(False)
985-
def map_locations(self, object mi):
986-
cdef:
987-
Py_ssize_t i, n
988-
ndarray[uint64_t] values
989-
uint64_t val
990-
int ret = 0
991-
khiter_t k
992-
993-
self.mi = mi
994-
n = len(mi)
995-
values = mi._hashed_values
996-
997-
with nogil:
998-
for i in range(n):
999-
val = values[i]
1000-
k = kh_put_uint64(self.table, val, &ret)
1001-
self.table.vals[k] = i
1002-
1003-
@cython.boundscheck(False)
1004-
def lookup(self, object mi):
1005-
# look up with a target mi
1006-
cdef:
1007-
Py_ssize_t i, n
1008-
ndarray[uint64_t] values
1009-
int ret = 0
1010-
uint64_t val
1011-
khiter_t k
1012-
int64_t[:] locs
1013-
1014-
n = len(mi)
1015-
values = mi._hashed_values
1016-
1017-
locs = np.empty(n, dtype=np.int64)
1018-
1019-
with nogil:
1020-
for i in range(n):
1021-
val = values[i]
1022-
k = kh_get_uint64(self.table, val)
1023-
if k != self.table.n_buckets:
1024-
locs[i] = self.table.vals[k]
1025-
else:
1026-
locs[i] = -1
1027-
1028-
self._check_for_collisions(locs, mi)
1029-
return np.asarray(locs)
1030-
1031-
def unique(self, object mi):
1032-
raise NotImplementedError
1033-
1034-
def get_labels(self, object mi, ObjectVector uniques,
1035-
Py_ssize_t count_prior, int64_t na_sentinel,
1036-
bint check_null=True):
1037-
raise NotImplementedError

0 commit comments

Comments
 (0)