Skip to content

Commit 5eb692b

Browse files
author
Matias Heikkilä
committed
Merge branch 'master' into validation-test-for-sorting
2 parents e40c5a9 + 8d124ea commit 5eb692b

File tree

134 files changed

+2943
-1844
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

134 files changed

+2943
-1844
lines changed

asv_bench/benchmarks/frame_methods.py

+2
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ def time_dict_rename_both_axes(self):
9696

9797

9898
class Iteration:
99+
# mem_itertuples_* benchmarks are slow
100+
timeout = 120
99101

100102
def setup(self):
101103
N = 1000

asv_bench/benchmarks/groupby.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
from functools import partial
22
from itertools import product
33
from string import ascii_letters
4-
import warnings
54

65
import numpy as np
76

87
from pandas import (
9-
Categorical, DataFrame, MultiIndex, Series, TimeGrouper, Timestamp,
8+
Categorical, DataFrame, MultiIndex, Series, Timestamp,
109
date_range, period_range)
1110
import pandas.util.testing as tm
1211

@@ -301,10 +300,6 @@ def setup(self):
301300
def time_multi_size(self):
302301
self.df.groupby(['key1', 'key2']).size()
303302

304-
def time_dt_timegrouper_size(self):
305-
with warnings.catch_warnings(record=True):
306-
self.df.groupby(TimeGrouper(key='dates', freq='M')).size()
307-
308303
def time_category_size(self):
309304
self.draws.groupby(self.cats).size()
310305

asv_bench/benchmarks/index_object.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ def time_is_dates_only(self):
5252

5353
class Ops:
5454

55-
sample_time = 0.2
5655
params = ['float', 'int']
5756
param_names = ['dtype']
5857

@@ -95,6 +94,12 @@ def time_min(self):
9594
def time_min_trivial(self):
9695
self.idx_inc.min()
9796

97+
def time_get_loc_inc(self):
98+
self.idx_inc.get_loc(900000)
99+
100+
def time_get_loc_dec(self):
101+
self.idx_dec.get_loc(100000)
102+
98103

99104
class IndexAppend:
100105

@@ -183,7 +188,7 @@ def time_get_loc(self):
183188

184189
class IntervalIndexMethod:
185190
# GH 24813
186-
params = [10**3, 10**5]
191+
params = [10**3, 10**5, 10**7]
187192

188193
def setup(self, N):
189194
left = np.append(np.arange(N), np.array(0))
@@ -194,5 +199,8 @@ def setup(self, N):
194199
def time_monotonic_inc(self, N):
195200
self.intv.is_monotonic_increasing
196201

202+
def time_is_unique(self, N):
203+
self.intv.is_unique
204+
197205

198206
from .pandas_vb_common import setup # noqa: F401

asv_bench/benchmarks/io/parsers.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import numpy as np
22

3-
from pandas._libs.tslibs.parsing import (
4-
_concat_date_cols, _does_string_look_like_datetime)
3+
try:
4+
from pandas._libs.tslibs.parsing import (
5+
_concat_date_cols, _does_string_look_like_datetime)
6+
except ImportError:
7+
# Avoid whole benchmark suite import failure on asv (currently 0.4)
8+
pass
59

610

711
class DoesStringLookLikeDatetime(object):

asv_bench/benchmarks/rolling.py

-6
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
class Methods:
66

7-
sample_time = 0.2
87
params = (['DataFrame', 'Series'],
98
[10, 1000],
109
['int', 'float'],
@@ -23,7 +22,6 @@ def time_rolling(self, constructor, window, dtype, method):
2322

2423
class ExpandingMethods:
2524

26-
sample_time = 0.2
2725
params = (['DataFrame', 'Series'],
2826
['int', 'float'],
2927
['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt',
@@ -41,7 +39,6 @@ def time_expanding(self, constructor, dtype, method):
4139

4240
class EWMMethods:
4341

44-
sample_time = 0.2
4542
params = (['DataFrame', 'Series'],
4643
[10, 1000],
4744
['int', 'float'],
@@ -58,7 +55,6 @@ def time_ewm(self, constructor, window, dtype, method):
5855

5956

6057
class VariableWindowMethods(Methods):
61-
sample_time = 0.2
6258
params = (['DataFrame', 'Series'],
6359
['50s', '1h', '1d'],
6460
['int', 'float'],
@@ -75,7 +71,6 @@ def setup(self, constructor, window, dtype, method):
7571

7672
class Pairwise:
7773

78-
sample_time = 0.2
7974
params = ([10, 1000, None],
8075
['corr', 'cov'],
8176
[True, False])
@@ -95,7 +90,6 @@ def time_pairwise(self, window, method, pairwise):
9590

9691

9792
class Quantile:
98-
sample_time = 0.2
9993
params = (['DataFrame', 'Series'],
10094
[10, 1000],
10195
['int', 'float'],

azure-pipelines.yml

+3-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
name: Windows
1616
vmImage: vs2017-win2016
1717

18-
- job: 'Checks_and_doc'
18+
- job: 'Checks'
1919
pool:
2020
vmImage: ubuntu-16.04
2121
timeoutInMinutes: 90
@@ -97,10 +97,11 @@ jobs:
9797
- script: |
9898
export PATH=$HOME/miniconda3/bin:$PATH
9999
source activate pandas-dev
100+
cd asv_bench
101+
asv check -E existing
100102
git remote add upstream https://github.com/pandas-dev/pandas.git
101103
git fetch upstream
102104
if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
103-
cd asv_bench
104105
asv machine --yes
105106
ASV_OUTPUT="$(asv dev)"
106107
if [[ $(echo "$ASV_OUTPUT" | grep "failed") ]]; then

ci/deps/azure-35-compat.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,5 @@ dependencies:
2626
- pip
2727
- pip:
2828
# for python 3.5, pytest>=4.0.2 is not available in conda
29-
- pytest>=4.0.2
29+
- pytest==4.5.0
3030
- html5lib==1.0b2

ci/deps/azure-macos-35.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ dependencies:
2525
- pip:
2626
- python-dateutil==2.5.3
2727
# universal
28-
- pytest>=4.0.2
28+
- pytest==4.5.0
2929
- pytest-xdist
3030
- pytest-mock
3131
- hypothesis>=3.58.0

ci/setup_env.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,12 @@ echo "conda list"
118118
conda list
119119

120120
# Install DB for Linux
121-
if [ ${TRAVIS_OS_NAME} == "linux" ]; then
121+
if [ "${TRAVIS_OS_NAME}" == "linux" ]; then
122122
echo "installing dbs"
123123
mysql -e 'create database pandas_nosetest;'
124124
psql -c 'create database pandas_nosetest;' -U postgres
125125
else
126-
echo "not using dbs on non-linux"
126+
echo "not using dbs on non-linux Travis builds or Azure Pipelines"
127127
fi
128128

129129
echo "done"

doc/source/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@
319319
pd.options.display.max_rows = 15
320320
321321
import os
322-
os.chdir('{}')
322+
os.chdir(r'{}')
323323
""".format(os.path.dirname(os.path.dirname(__file__)))
324324

325325

doc/source/install.rst

-1
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,6 @@ Optional Dependencies
281281
`qtpy <https://github.com/spyder-ide/qtpy>`__ (requires PyQt or PySide),
282282
`PyQt5 <https://www.riverbankcomputing.com/software/pyqt/download5>`__,
283283
`PyQt4 <http://www.riverbankcomputing.com/software/pyqt/download>`__,
284-
`pygtk <http://www.pygtk.org/>`__,
285284
`xsel <http://www.vergenet.net/~conrad/software/xsel/>`__, or
286285
`xclip <https://github.com/astrand/xclip/>`__: necessary to use
287286
:func:`~pandas.read_clipboard`. Most package managers on Linux distributions will have ``xclip`` and/or ``xsel`` immediately available for installation.

doc/source/reference/frame.rst

-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ Conversion
4848
:toctree: api/
4949

5050
DataFrame.astype
51-
DataFrame.convert_objects
5251
DataFrame.infer_objects
5352
DataFrame.copy
5453
DataFrame.isna

doc/source/reference/indexing.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
.. _api.indexing:
44

5-
========
6-
Indexing
7-
========
5+
=============
6+
Index Objects
7+
=============
88

99
Index
1010
-----

doc/source/reference/series.rst

-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ Conversion
5656

5757
Series.astype
5858
Series.infer_objects
59-
Series.convert_objects
6059
Series.copy
6160
Series.bool
6261
Series.to_numpy

doc/source/user_guide/groupby.rst

+61-13
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,67 @@ For a grouped ``DataFrame``, you can rename in a similar manner:
568568
'mean': 'bar',
569569
'std': 'baz'}))
570570
571+
.. _groupby.aggregate.named:
572+
573+
Named Aggregation
574+
~~~~~~~~~~~~~~~~~
575+
576+
.. versionadded:: 0.25.0
577+
578+
To support column-specific aggregation *with control over the output column names*, pandas
579+
accepts the special syntax in :meth:`GroupBy.agg`, known as "named aggregation", where
580+
581+
- The keywords are the *output* column names
582+
- The values are tuples whose first element is the column to select
583+
and the second element is the aggregation to apply to that column. Pandas
584+
provides the ``pandas.NamedAgg`` namedtuple with the fields ``['column', 'aggfunc']``
585+
to make it clearer what the arguments are. As usual, the aggregation can
586+
be a callable or a string alias.
587+
588+
.. ipython:: python
589+
590+
animals = pd.DataFrame({'kind': ['cat', 'dog', 'cat', 'dog'],
591+
'height': [9.1, 6.0, 9.5, 34.0],
592+
'weight': [7.9, 7.5, 9.9, 198.0]})
593+
animals
594+
595+
animals.groupby("kind").agg(
596+
min_height=pd.NamedAgg(column='height', aggfunc='min'),
597+
max_height=pd.NamedAgg(column='height', aggfunc='max'),
598+
average_weight=pd.NamedAgg(column='height', aggfunc=np.mean),
599+
)
600+
601+
602+
``pandas.NamedAgg`` is just a ``namedtuple``. Plain tuples are allowed as well.
603+
604+
.. ipython:: python
605+
606+
animals.groupby("kind").agg(
607+
min_height=('height', 'min'),
608+
max_height=('height', 'max'),
609+
average_weight=('height', np.mean),
610+
)
611+
612+
613+
If your desired output column names are not valid python keywords, construct a dictionary
614+
and unpack the keyword arguments
615+
616+
.. ipython:: python
617+
618+
animals.groupby("kind").agg(**{
619+
'total weight': pd.NamedAgg(column='weight', aggfunc=sum),
620+
})
621+
622+
Additional keyword arguments are not passed through to the aggregation functions. Only pairs
623+
of ``(column, aggfunc)`` should be passed as ``**kwargs``. If your aggregation functions
624+
requires additional arguments, partially apply them with :meth:`functools.partial`.
625+
626+
.. note::
627+
628+
For Python 3.5 and earlier, the order of ``**kwargs`` in a functions was not
629+
preserved. This means that the output column ordering would not be
630+
consistent. To ensure consistent ordering, the keys (and so output columns)
631+
will always be sorted for Python 3.5.
571632

572633
Applying different functions to DataFrame columns
573634
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -588,19 +649,6 @@ must be either implemented on GroupBy or available via :ref:`dispatching
588649
589650
grouped.agg({'C': 'sum', 'D': 'std'})
590651
591-
.. note::
592-
593-
If you pass a dict to ``aggregate``, the ordering of the output columns is
594-
non-deterministic. If you want to be sure the output columns will be in a specific
595-
order, you can use an ``OrderedDict``. Compare the output of the following two commands:
596-
597-
.. ipython:: python
598-
599-
from collections import OrderedDict
600-
601-
grouped.agg({'D': 'std', 'C': 'mean'})
602-
grouped.agg(OrderedDict([('D', 'std'), ('C', 'mean')]))
603-
604652
.. _groupby.aggregate.cython:
605653

606654
Cython-optimized aggregation functions

doc/source/user_guide/io.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -3272,7 +3272,7 @@ We can see that we got the same content back, which we had earlier written to th
32723272

32733273
.. note::
32743274

3275-
You may need to install xclip or xsel (with gtk, PyQt5, PyQt4 or qtpy) on Linux to use these methods.
3275+
You may need to install xclip or xsel (with PyQt5, PyQt4 or qtpy) on Linux to use these methods.
32763276

32773277
.. _io.pickle:
32783278

0 commit comments

Comments
 (0)