Skip to content

Commit fcfd8bc

Browse files
Merge branch 'master' into pandas-devGH-21980
2 parents ace5b6b + 9122952 commit fcfd8bc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+1306
-860
lines changed

ci/doctests.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then
2121

2222
# DataFrame / Series docstrings
2323
pytest --doctest-modules -v pandas/core/frame.py \
24-
-k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_records -to_stata -transform"
24+
-k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata -transform"
2525

2626
if [ $? -ne "0" ]; then
2727
RET=1

ci/lint.sh

+5-5
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,14 @@ if [ "$LINT" ]; then
2020

2121
# pandas/_libs/src is C code, so no need to search there.
2222
echo "Linting *.py"
23-
flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=C406,C408,C409,C410,E402,E731,E741,W503
23+
flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=C406,C408,C409,E402,E731,E741,W503
2424
if [ $? -ne "0" ]; then
2525
RET=1
2626
fi
2727
echo "Linting *.py DONE"
2828

2929
echo "Linting setup.py"
30-
flake8 setup.py --ignore=C406,C408,C409,C410,E402,E731,E741,W503
30+
flake8 setup.py --ignore=E402,E731,E741,W503
3131
if [ $? -ne "0" ]; then
3232
RET=1
3333
fi
@@ -41,21 +41,21 @@ if [ "$LINT" ]; then
4141
echo "Linting asv_bench/benchmarks/*.py DONE"
4242

4343
echo "Linting scripts/*.py"
44-
flake8 scripts --filename=*.py --ignore=C406,C408,C409,C410,E402,E731,E741,W503
44+
flake8 scripts --filename=*.py --ignore=C408,E402,E731,E741,W503
4545
if [ $? -ne "0" ]; then
4646
RET=1
4747
fi
4848
echo "Linting scripts/*.py DONE"
4949

5050
echo "Linting doc scripts"
51-
flake8 doc/make.py doc/source/conf.py --ignore=C406,C408,C409,C410,E402,E731,E741,W503
51+
flake8 doc/make.py doc/source/conf.py --ignore=E402,E731,E741,W503
5252
if [ $? -ne "0" ]; then
5353
RET=1
5454
fi
5555
echo "Linting doc scripts DONE"
5656

5757
echo "Linting *.pyx"
58-
flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C407,C411
58+
flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
5959
if [ $? -ne "0" ]; then
6060
RET=1
6161
fi

ci/travis-27.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ dependencies:
2929
- PyCrypto
3030
- pymysql=0.6.3
3131
- pytables
32+
- blosc=1.14.3
3233
- python-blosc
3334
- python-dateutil=2.5.0
3435
- python=2.7*

ci/travis-36-doc.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ dependencies:
3636
- sphinx
3737
- sqlalchemy
3838
- statsmodels
39+
- tzlocal
3940
- xarray
4041
- xlrd
4142
- xlsxwriter

doc/source/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -2352,6 +2352,7 @@ Computations / Descriptive Stats
23522352
Resampler.std
23532353
Resampler.sum
23542354
Resampler.var
2355+
Resampler.quantile
23552356

23562357
Style
23572358
-----

doc/source/groupby.rst

+4-6
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,8 @@ consider the following ``DataFrame``:
106106
.. versionadded:: 0.20
107107

108108
A string passed to ``groupby`` may refer to either a column or an index level.
109-
If a string matches both a column name and an index level name then a warning is
110-
issued and the column takes precedence. This will result in an ambiguity error
111-
in a future version.
109+
If a string matches both a column name and an index level name, a
110+
``ValueError`` will be raised.
112111

113112
.. ipython:: python
114113
@@ -389,7 +388,7 @@ This is mainly syntactic sugar for the alternative and much more verbose:
389388
Additionally this method avoids recomputing the internal grouping information
390389
derived from the passed key.
391390

392-
.. _groupby.iterating:
391+
.. _groupby.iterating-label:
393392

394393
Iterating through groups
395394
------------------------
@@ -415,8 +414,7 @@ In the case of grouping by multiple keys, the group name will be a tuple:
415414
...: print(group)
416415
...:
417416

418-
It's standard Python-fu but remember you can unpack the tuple in the for loop
419-
statement if you wish: ``for (k1, k2), group in grouped:``.
417+
See :ref:`timeseries.iterating-label`.
420418

421419
Selecting a group
422420
-----------------

doc/source/timeseries.rst

+18
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,24 @@ regularity will result in a ``DatetimeIndex``, although frequency is lost:
703703
704704
ts2[[0, 2, 6]].index
705705
706+
.. _timeseries.iterating-label:
707+
708+
Iterating through groups
709+
------------------------
710+
711+
With the :ref:`Resampler` object in hand, iterating through the grouped data is very
712+
natural and functions similarly to :py:func:`itertools.groupby`:
713+
714+
.. ipython:: python
715+
716+
resampled = df.resample('H')
717+
718+
for name, group in resampled:
719+
print(name)
720+
print(group)
721+
722+
See :ref:`groupby.iterating-label`.
723+
706724
.. _timeseries.components:
707725

708726
Time/Date Components

doc/source/whatsnew/v0.24.0.txt

+19-6
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ Pandas has gained the ability to hold integer dtypes with missing values. This l
4242
Here is an example of the usage.
4343

4444
We can construct a ``Series`` with the specified dtype. The dtype string ``Int64`` is a pandas ``ExtensionDtype``. Specifying a list or array using the traditional missing value
45-
marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`)
45+
marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`, :issue:`22441`)
4646

4747
.. ipython:: python
4848

@@ -182,6 +182,8 @@ Other Enhancements
182182
- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`)
183183
- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)
184184
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
185+
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
186+
- :ref:`Series.resample` and :ref:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).
185187

186188
.. _whatsnew_0240.api_breaking:
187189

@@ -442,11 +444,13 @@ ExtensionType Changes
442444
- ``ExtensionArray`` has gained the abstract methods ``.dropna()`` (:issue:`21185`)
443445
- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore
444446
the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`)
447+
- Added ``ExtensionDtype._is_numeric`` for controlling whether an extension dtype is considered numeric (:issue:`22290`).
445448
- The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`)
446449
- Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`)
450+
- :meth:`~Series.shift` now dispatches to :meth:`ExtensionArray.shift` (:issue:`22386`)
447451
- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)
448452
- :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`)
449-
-
453+
- :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185:`).
450454

451455
.. _whatsnew_0240.api.incompatibilities:
452456

@@ -517,8 +521,10 @@ Removal of prior version deprecations/changes
517521
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
518522

519523
- The ``LongPanel`` and ``WidePanel`` classes have been removed (:issue:`10892`)
524+
- :meth:`Series.repeat` has renamed the ``reps`` argument to ``repeats`` (:issue:`14645`)
520525
- Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`)
521526
- Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`)
527+
- Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`)
522528
-
523529

524530
.. _whatsnew_0240.performance:
@@ -576,13 +582,17 @@ Datetimelike
576582
- Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`,:issue:`22163`)
577583
- Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`)
578584
- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`)
579-
- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`)
580-
-
581585

582586
Timedelta
583587
^^^^^^^^^
584588

585589
- Fixed bug where subtracting :class:`Timedelta` from an object-dtyped array would raise ``TypeError`` (:issue:`21980`)
590+
- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`)
591+
- Bug in adding a :class:`Index` with object dtype to a :class:`Series` with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`22390`)
592+
- Bug in multiplying a :class:`Series` with numeric dtype against a ``timedelta`` object (:issue:`22390`)
593+
- Bug in :class:`Series` with numeric dtype when adding or subtracting an an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`)
594+
- Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`)
595+
-
586596
-
587597
-
588598

@@ -626,6 +636,7 @@ Numeric
626636
a ``TypeError`` was wrongly raised. For all three methods such calculation are now done correctly. (:issue:`16679`).
627637
- Bug in :class:`Series` comparison against datetime-like scalars and arrays (:issue:`22074`)
628638
- Bug in :class:`DataFrame` multiplication between boolean dtype and integer returning ``object`` dtype instead of integer dtype (:issue:`22047`,:issue:`22163`)
639+
- Bug in :meth:`DataFrame.apply` where, when supplied with a string argument and additional positional or keyword arguments (e.g. ``df.apply('sum', min_count=1)``), a ``TypeError`` was wrongly raised (:issue:`22376`)
629640
-
630641

631642
Strings
@@ -654,6 +665,7 @@ Indexing
654665
- Fixed ``DataFrame[np.nan]`` when columns are non-unique (:issue:`21428`)
655666
- Bug when indexing :class:`DatetimeIndex` with nanosecond resolution dates and timezones (:issue:`11679`)
656667
- Bug where indexing with a Numpy array containing negative values would mutate the indexer (:issue:`21867`)
668+
- ``Float64Index.get_loc`` now raises ``KeyError`` when boolean key passed. (:issue:`19087`)
657669

658670
Missing
659671
^^^^^^^
@@ -674,6 +686,7 @@ I/O
674686

675687
- :func:`read_html()` no longer ignores all-whitespace ``<tr>`` within ``<thead>`` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`)
676688
- :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`)
689+
- :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`)
677690
-
678691

679692
Plotting
@@ -691,7 +704,7 @@ Groupby/Resample/Rolling
691704
``SeriesGroupBy`` when the grouping variable only contains NaNs and numpy version < 1.13 (:issue:`21956`).
692705
- Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'` and a
693706
datetime-like index leading to incorrect results and also segfault. (:issue:`21704`)
694-
-
707+
- Bug in :meth:`Resampler.apply` when passing postiional arguments to applied func (:issue:`14615`).
695708

696709
Sparse
697710
^^^^^^
@@ -712,7 +725,7 @@ Reshaping
712725
- Bug in :func:`get_dummies` with Unicode attributes in Python 2 (:issue:`22084`)
713726
- Bug in :meth:`DataFrame.replace` raises ``RecursionError`` when replacing empty lists (:issue:`22083`)
714727
- Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the `to_replace` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`)
715-
-
728+
- Bug in :meth:`DataFrame.drop_duplicates` for empty ``DataFrame`` which incorrectly raises an error (:issue:`20516`)
716729

717730
Build Changes
718731
^^^^^^^^^^^^^

pandas/_libs/algos.pxd

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
11
from util cimport numeric
2-
from numpy cimport float64_t, double_t
2+
33

44
cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil
55

6+
67
cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
7-
cdef numeric t
8+
cdef:
9+
numeric t
810

911
# cython doesn't allow pointer dereference so use array syntax
1012
t = a[0]
1113
a[0] = b[0]
1214
b[0] = t
1315
return 0
1416

17+
1518
cdef enum TiebreakEnumType:
1619
TIEBREAK_AVERAGE
1720
TIEBREAK_MIN,

pandas/_libs/algos.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ tiebreakers = {
4545
}
4646

4747

48-
cdef inline are_diff(object left, object right):
48+
cdef inline bint are_diff(object left, object right):
4949
try:
5050
return fabs(left - right) > FP_ERR
5151
except TypeError:

pandas/_libs/algos_common_helper.pxi.in

+33-26
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,12 @@ cpdef map_indices_{{name}}(ndarray[{{c_type}}] index):
6868

6969
@cython.boundscheck(False)
7070
@cython.wraparound(False)
71-
def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
72-
limit=None):
73-
cdef Py_ssize_t i, j, nleft, nright
74-
cdef ndarray[int64_t, ndim=1] indexer
75-
cdef {{c_type}} cur, next
76-
cdef int lim, fill_count = 0
71+
def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, limit=None):
72+
cdef:
73+
Py_ssize_t i, j, nleft, nright
74+
ndarray[int64_t, ndim=1] indexer
75+
{{c_type}} cur, next
76+
int lim, fill_count = 0
7777

7878
nleft = len(old)
7979
nright = len(new)
@@ -135,9 +135,10 @@ def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
135135
def pad_inplace_{{name}}(ndarray[{{c_type}}] values,
136136
ndarray[uint8_t, cast=True] mask,
137137
limit=None):
138-
cdef Py_ssize_t i, N
139-
cdef {{c_type}} val
140-
cdef int lim, fill_count = 0
138+
cdef:
139+
Py_ssize_t i, N
140+
{{c_type}} val
141+
int lim, fill_count = 0
141142

142143
N = len(values)
143144

@@ -171,9 +172,10 @@ def pad_inplace_{{name}}(ndarray[{{c_type}}] values,
171172
def pad_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
172173
ndarray[uint8_t, ndim=2] mask,
173174
limit=None):
174-
cdef Py_ssize_t i, j, N, K
175-
cdef {{c_type}} val
176-
cdef int lim, fill_count = 0
175+
cdef:
176+
Py_ssize_t i, j, N, K
177+
{{c_type}} val
178+
int lim, fill_count = 0
177179

178180
K, N = (<object> values).shape
179181

@@ -233,10 +235,11 @@ D
233235
@cython.wraparound(False)
234236
def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
235237
limit=None):
236-
cdef Py_ssize_t i, j, nleft, nright
237-
cdef ndarray[int64_t, ndim=1] indexer
238-
cdef {{c_type}} cur, prev
239-
cdef int lim, fill_count = 0
238+
cdef:
239+
Py_ssize_t i, j, nleft, nright
240+
ndarray[int64_t, ndim=1] indexer
241+
{{c_type}} cur, prev
242+
int lim, fill_count = 0
240243

241244
nleft = len(old)
242245
nright = len(new)
@@ -299,9 +302,10 @@ def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
299302
def backfill_inplace_{{name}}(ndarray[{{c_type}}] values,
300303
ndarray[uint8_t, cast=True] mask,
301304
limit=None):
302-
cdef Py_ssize_t i, N
303-
cdef {{c_type}} val
304-
cdef int lim, fill_count = 0
305+
cdef:
306+
Py_ssize_t i, N
307+
{{c_type}} val
308+
int lim, fill_count = 0
305309

306310
N = len(values)
307311

@@ -335,9 +339,10 @@ def backfill_inplace_{{name}}(ndarray[{{c_type}}] values,
335339
def backfill_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
336340
ndarray[uint8_t, ndim=2] mask,
337341
limit=None):
338-
cdef Py_ssize_t i, j, N, K
339-
cdef {{c_type}} val
340-
cdef int lim, fill_count = 0
342+
cdef:
343+
Py_ssize_t i, j, N, K
344+
{{c_type}} val
345+
int lim, fill_count = 0
341346

342347
K, N = (<object> values).shape
343348

@@ -428,10 +433,10 @@ def is_monotonic_{{name}}(ndarray[{{c_type}}] arr, bint timelike):
428433
@cython.wraparound(False)
429434
@cython.boundscheck(False)
430435
def arrmap_{{name}}(ndarray[{{c_type}}] index, object func):
431-
cdef Py_ssize_t length = index.shape[0]
432-
cdef Py_ssize_t i = 0
433-
434-
cdef ndarray[object] result = np.empty(length, dtype=np.object_)
436+
cdef:
437+
Py_ssize_t length = index.shape[0]
438+
Py_ssize_t i = 0
439+
ndarray[object] result = np.empty(length, dtype=np.object_)
435440

436441
from pandas._libs.lib import maybe_convert_objects
437442

@@ -535,6 +540,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
535540

536541
cdef int PLATFORM_INT = (<ndarray> np.arange(0, dtype=np.intp)).descr.type_num
537542

543+
538544
cpdef ensure_platform_int(object arr):
539545
# GH3033, GH1392
540546
# platform int is the size of the int pointer, e.g. np.intp
@@ -546,6 +552,7 @@ cpdef ensure_platform_int(object arr):
546552
else:
547553
return np.array(arr, dtype=np.intp)
548554

555+
549556
cpdef ensure_object(object arr):
550557
if util.is_array(arr):
551558
if (<ndarray> arr).descr.type_num == NPY_OBJECT:

0 commit comments

Comments
 (0)