Skip to content

Commit 99204a4

Browse files
committed
Merge remote-tracking branch 'upstream/master' into cat
2 parents 57480bd + 4c54dd2 commit 99204a4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+1542
-840
lines changed

asv_bench/benchmarks/frame_methods.py

+2
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ def time_dict_rename_both_axes(self):
9696

9797

9898
class Iteration:
99+
# mem_itertuples_* benchmarks are slow
100+
timeout = 120
99101

100102
def setup(self):
101103
N = 1000

asv_bench/benchmarks/groupby.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
from functools import partial
22
from itertools import product
33
from string import ascii_letters
4-
import warnings
54

65
import numpy as np
76

87
from pandas import (
9-
Categorical, DataFrame, MultiIndex, Series, TimeGrouper, Timestamp,
8+
Categorical, DataFrame, MultiIndex, Series, Timestamp,
109
date_range, period_range)
1110
import pandas.util.testing as tm
1211

@@ -301,10 +300,6 @@ def setup(self):
301300
def time_multi_size(self):
302301
self.df.groupby(['key1', 'key2']).size()
303302

304-
def time_dt_timegrouper_size(self):
305-
with warnings.catch_warnings(record=True):
306-
self.df.groupby(TimeGrouper(key='dates', freq='M')).size()
307-
308303
def time_category_size(self):
309304
self.draws.groupby(self.cats).size()
310305

asv_bench/benchmarks/io/parsers.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import numpy as np
22

3-
from pandas._libs.tslibs.parsing import (
4-
_concat_date_cols, _does_string_look_like_datetime)
3+
try:
4+
from pandas._libs.tslibs.parsing import (
5+
_concat_date_cols, _does_string_look_like_datetime)
6+
except ImportError:
7+
# Avoid whole benchmark suite import failure on asv (currently 0.4)
8+
pass
59

610

711
class DoesStringLookLikeDatetime(object):

azure-pipelines.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,11 @@ jobs:
9797
- script: |
9898
export PATH=$HOME/miniconda3/bin:$PATH
9999
source activate pandas-dev
100+
cd asv_bench
101+
asv check -E existing
100102
git remote add upstream https://github.com/pandas-dev/pandas.git
101103
git fetch upstream
102104
if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
103-
cd asv_bench
104105
asv machine --yes
105106
ASV_OUTPUT="$(asv dev)"
106107
if [[ $(echo "$ASV_OUTPUT" | grep "failed") ]]; then

ci/setup_env.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,12 @@ echo "conda list"
118118
conda list
119119

120120
# Install DB for Linux
121-
if [ ${TRAVIS_OS_NAME} == "linux" ]; then
121+
if [ "${TRAVIS_OS_NAME}" == "linux" ]; then
122122
echo "installing dbs"
123123
mysql -e 'create database pandas_nosetest;'
124124
psql -c 'create database pandas_nosetest;' -U postgres
125125
else
126-
echo "not using dbs on non-linux"
126+
echo "not using dbs on non-linux Travis builds or Azure Pipelines"
127127
fi
128128

129129
echo "done"

doc/source/user_guide/groupby.rst

+61-13
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,67 @@ For a grouped ``DataFrame``, you can rename in a similar manner:
568568
'mean': 'bar',
569569
'std': 'baz'}))
570570
571+
.. _groupby.aggregate.named:
572+
573+
Named Aggregation
574+
~~~~~~~~~~~~~~~~~
575+
576+
.. versionadded:: 0.25.0
577+
578+
To support column-specific aggregation *with control over the output column names*, pandas
579+
accepts the special syntax in :meth:`GroupBy.agg`, known as "named aggregation", where
580+
581+
- The keywords are the *output* column names
582+
- The values are tuples whose first element is the column to select
583+
and the second element is the aggregation to apply to that column. Pandas
584+
provides the ``pandas.NamedAgg`` namedtuple with the fields ``['column', 'aggfunc']``
585+
to make it clearer what the arguments are. As usual, the aggregation can
586+
be a callable or a string alias.
587+
588+
.. ipython:: python
589+
590+
animals = pd.DataFrame({'kind': ['cat', 'dog', 'cat', 'dog'],
591+
'height': [9.1, 6.0, 9.5, 34.0],
592+
'weight': [7.9, 7.5, 9.9, 198.0]})
593+
animals
594+
595+
animals.groupby("kind").agg(
596+
min_height=pd.NamedAgg(column='height', aggfunc='min'),
597+
max_height=pd.NamedAgg(column='height', aggfunc='max'),
598+
average_weight=pd.NamedAgg(column='height', aggfunc=np.mean),
599+
)
600+
601+
602+
``pandas.NamedAgg`` is just a ``namedtuple``. Plain tuples are allowed as well.
603+
604+
.. ipython:: python
605+
606+
animals.groupby("kind").agg(
607+
min_height=('height', 'min'),
608+
max_height=('height', 'max'),
609+
average_weight=('height', np.mean),
610+
)
611+
612+
613+
If your desired output column names are not valid python keywords, construct a dictionary
614+
and unpack the keyword arguments
615+
616+
.. ipython:: python
617+
618+
animals.groupby("kind").agg(**{
619+
'total weight': pd.NamedAgg(column='weight', aggfunc=sum),
620+
})
621+
622+
Additional keyword arguments are not passed through to the aggregation functions. Only pairs
623+
of ``(column, aggfunc)`` should be passed as ``**kwargs``. If your aggregation functions
624+
requires additional arguments, partially apply them with :meth:`functools.partial`.
625+
626+
.. note::
627+
628+
For Python 3.5 and earlier, the order of ``**kwargs`` in a functions was not
629+
preserved. This means that the output column ordering would not be
630+
consistent. To ensure consistent ordering, the keys (and so output columns)
631+
will always be sorted for Python 3.5.
571632

572633
Applying different functions to DataFrame columns
573634
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -588,19 +649,6 @@ must be either implemented on GroupBy or available via :ref:`dispatching
588649
589650
grouped.agg({'C': 'sum', 'D': 'std'})
590651
591-
.. note::
592-
593-
If you pass a dict to ``aggregate``, the ordering of the output columns is
594-
non-deterministic. If you want to be sure the output columns will be in a specific
595-
order, you can use an ``OrderedDict``. Compare the output of the following two commands:
596-
597-
.. ipython:: python
598-
599-
from collections import OrderedDict
600-
601-
grouped.agg({'D': 'std', 'C': 'mean'})
602-
grouped.agg(OrderedDict([('D', 'std'), ('C', 'mean')]))
603-
604652
.. _groupby.aggregate.cython:
605653

606654
Cython-optimized aggregation functions

0 commit comments

Comments
 (0)