Skip to content

Commit 5d7e3ea

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2
2 parents ea75c3c + d3f7d2a commit 5d7e3ea

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+1763
-1667
lines changed

ci/lint.sh

+15
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,13 @@ if [ "$LINT" ]; then
3030
fi
3131
echo "Linting asv_bench/benchmarks/*.py DONE"
3232

33+
echo "Linting scripts/*.py"
34+
flake8 scripts --filename=*.py
35+
if [ $? -ne "0" ]; then
36+
RET=1
37+
fi
38+
echo "Linting scripts/*.py DONE"
39+
3340
echo "Linting *.pyx"
3441
flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403
3542
if [ $? -ne "0" ]; then
@@ -89,6 +96,14 @@ if [ "$LINT" ]; then
8996
if [ $? = "0" ]; then
9097
RET=1
9198
fi
99+
100+
# Check for pytest.warns
101+
grep -r -E --include '*.py' 'pytest\.warns' pandas/tests/
102+
103+
if [ $? = "0" ]; then
104+
RET=1
105+
fi
106+
92107
echo "Check for invalid testing DONE"
93108

94109
# Check for imports from pandas.core.common instead

doc/source/api.rst

-1
Original file line numberDiff line numberDiff line change
@@ -1617,7 +1617,6 @@ IntervalIndex Components
16171617
IntervalIndex.from_arrays
16181618
IntervalIndex.from_tuples
16191619
IntervalIndex.from_breaks
1620-
IntervalIndex.from_intervals
16211620
IntervalIndex.contains
16221621
IntervalIndex.left
16231622
IntervalIndex.right

doc/source/whatsnew/v0.23.0.txt

+5-2
Original file line numberDiff line numberDiff line change
@@ -235,9 +235,8 @@ Other Enhancements
235235
:func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas
236236
to register custom accessors like ``.cat`` on pandas objects. See
237237
:ref:`Registering Custom Accessors <developer.register-accessors>` for more (:issue:`14781`).
238-
239-
240238
- ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`)
239+
- :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`)
241240

242241
.. _whatsnew_0230.api_breaking:
243242

@@ -357,6 +356,7 @@ Deprecations
357356
- ``Series.valid`` is deprecated. Use :meth:`Series.dropna` instead (:issue:`18800`).
358357
- :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`)
359358
- The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`).
359+
- ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`)
360360

361361

362362
.. _whatsnew_0230.prior_deprecations:
@@ -407,6 +407,8 @@ Performance Improvements
407407
- Improved performance of :func:`IntervalIndex.symmetric_difference()` (:issue:`18475`)
408408
- Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`)
409409
- :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`)
410+
- Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`)
411+
410412

411413
.. _whatsnew_0230.docs:
412414

@@ -532,6 +534,7 @@ Groupby/Resample/Rolling
532534
- Fixed regression in :func:`DataFrame.groupby` which would not emit an error when called with a tuple key not in the index (:issue:`18798`)
533535
- Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`)
534536
- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`)
537+
- Bug in ``transform`` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`)
535538
-
536539

537540
Sparse

pandas/_libs/algos.pyx

-18
Original file line numberDiff line numberDiff line change
@@ -196,24 +196,6 @@ cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil:
196196
return a[k]
197197

198198

199-
cpdef numeric median(numeric[:] arr):
200-
"""
201-
A faster median
202-
"""
203-
cdef Py_ssize_t n = arr.size
204-
205-
if n == 0:
206-
return np.NaN
207-
208-
arr = arr.copy()
209-
210-
if n % 2:
211-
return kth_smallest(arr, n // 2)
212-
else:
213-
return (kth_smallest(arr, n // 2) +
214-
kth_smallest(arr, n // 2 - 1)) / 2
215-
216-
217199
# ----------------------------------------------------------------------
218200
# Pairwise correlation/covariance
219201

pandas/_libs/groupby.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def group_last_object(ndarray[object, ndim=2] out,
118118
out[i, j] = resx[i, j]
119119

120120

121-
cdef inline float64_t _median_linear(float64_t* a, int n) nogil:
121+
cdef inline float64_t median_linear(float64_t* a, int n) nogil:
122122
cdef int i, j, na_count = 0
123123
cdef float64_t result
124124
cdef float64_t* tmp

pandas/_libs/groupby_helper.pxi.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -740,7 +740,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
740740
ptr += _counts[0]
741741
for j in range(ngroups):
742742
size = _counts[j + 1]
743-
out[j, i] = _median_linear(ptr, size)
743+
out[j, i] = median_linear(ptr, size)
744744
ptr += size
745745

746746

pandas/_libs/index.pyx

+2-18
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,6 @@ cpdef object get_value_box(ndarray arr, object loc):
7373
return util.get_value_1d(arr, i)
7474

7575

76-
def set_value_at(ndarray arr, object loc, object val):
77-
return util.set_value_at(arr, loc, val)
78-
79-
8076
# Don't populate hash tables in monotonic indexes larger than this
8177
_SIZE_CUTOFF = 1000000
8278

@@ -404,18 +400,6 @@ cdef Py_ssize_t _bin_search(ndarray values, object val) except -1:
404400
else:
405401
return mid + 1
406402

407-
_pad_functions = {
408-
'object': algos.pad_object,
409-
'int64': algos.pad_int64,
410-
'float64': algos.pad_float64
411-
}
412-
413-
_backfill_functions = {
414-
'object': algos.backfill_object,
415-
'int64': algos.backfill_int64,
416-
'float64': algos.backfill_float64
417-
}
418-
419403

420404
cdef class DatetimeEngine(Int64Engine):
421405

@@ -566,7 +550,7 @@ cpdef convert_scalar(ndarray arr, object value):
566550
# we don't turn bools into int/float/complex
567551

568552
if arr.descr.type_num == NPY_DATETIME:
569-
if isinstance(value, np.ndarray):
553+
if util.is_array(value):
570554
pass
571555
elif isinstance(value, (datetime, np.datetime64, date)):
572556
return Timestamp(value).value
@@ -577,7 +561,7 @@ cpdef convert_scalar(ndarray arr, object value):
577561
raise ValueError("cannot set a Timestamp with a non-timestamp")
578562

579563
elif arr.descr.type_num == NPY_TIMEDELTA:
580-
if isinstance(value, np.ndarray):
564+
if util.is_array(value):
581565
pass
582566
elif isinstance(value, timedelta):
583567
return Timedelta(value).value

pandas/_libs/internals.pyx

+4-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ cimport cython
44
from cython cimport Py_ssize_t
55

66
from cpython cimport PyObject
7+
from cpython.slice cimport PySlice_Check
78

89
cdef extern from "Python.h":
910
Py_ssize_t PY_SSIZE_T_MAX
@@ -32,7 +33,7 @@ cdef class BlockPlacement:
3233
self._has_slice = False
3334
self._has_array = False
3435

35-
if isinstance(val, slice):
36+
if PySlice_Check(val):
3637
slc = slice_canonize(val)
3738

3839
if slc.start != slc.stop:
@@ -118,7 +119,7 @@ cdef class BlockPlacement:
118119
else:
119120
val = self._as_array[loc]
120121

121-
if not isinstance(val, slice) and val.ndim == 0:
122+
if not PySlice_Check(val) and val.ndim == 0:
122123
return val
123124

124125
return BlockPlacement(val)
@@ -288,7 +289,7 @@ def slice_getitem(slice slc not None, ind):
288289

289290
s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc)
290291

291-
if isinstance(ind, slice):
292+
if PySlice_Check(ind):
292293
ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind,
293294
s_len)
294295

pandas/_libs/interval.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ cdef class Interval(IntervalMixin):
109109
cut, qcut : Convert arrays of continuous data into Categoricals/Series of
110110
Interval.
111111
"""
112+
_typ = "interval"
112113

113114
cdef readonly object left
114115
"""Left bound for the interval"""

pandas/_libs/lib.pyx

+1-10
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ from numpy cimport (ndarray, PyArray_NDIM, PyArray_GETITEM,
1717
np.import_array()
1818
np.import_ufunc()
1919

20-
from libc.stdlib cimport malloc, free
21-
2220
from cpython cimport (Py_INCREF, PyTuple_SET_ITEM,
2321
PyList_Check, PyFloat_Check,
2422
PyString_Check,
@@ -27,8 +25,7 @@ from cpython cimport (Py_INCREF, PyTuple_SET_ITEM,
2725
PyTuple_New,
2826
PyObject_RichCompareBool,
2927
PyBytes_GET_SIZE,
30-
PyUnicode_GET_SIZE,
31-
PyObject)
28+
PyUnicode_GET_SIZE)
3229

3330
try:
3431
from cpython cimport PyString_GET_SIZE
@@ -37,19 +34,13 @@ except ImportError:
3734

3835
cimport cpython
3936

40-
isnan = np.isnan
41-
cdef double NaN = <double> np.NaN
42-
cdef double nan = NaN
4337

4438
from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
4539
PyTime_Check, PyDelta_Check,
4640
PyDateTime_IMPORT)
4741
PyDateTime_IMPORT
4842

49-
from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value
50-
5143
from tslib import NaT, Timestamp, Timedelta, array_to_datetime
52-
from interval import Interval
5344
from missing cimport checknull
5445

5546

0 commit comments

Comments
 (0)