Skip to content

Commit 9fc617b

Browse files
author
Carlos Souza
committed
Merge remote-tracking branch 'upstream/master'
2 parents e12bca7 + d2f32a0 commit 9fc617b

21 files changed

+332
-302
lines changed

.travis.yml

+27-105
Original file line numberDiff line numberDiff line change
@@ -27,154 +27,71 @@ matrix:
2727
- language: objective-c
2828
os: osx
2929
compiler: clang
30-
osx_image: xcode6.4
3130
cache:
3231
ccache: true
3332
directories:
3433
- $HOME/.cache # cython cache
3534
- $HOME/.ccache # compiler cache
3635
env:
37-
- PYTHON_VERSION=3.5
38-
- JOB_NAME: "35_osx"
39-
- TEST_ARGS="--skip-slow --skip-network"
40-
- JOB_TAG=_OSX
41-
- TRAVIS_PYTHON_VERSION=3.5
42-
- CACHE_NAME="35_osx"
43-
- USE_CACHE=true
36+
- PYTHON_VERSION=3.5 JOB_NAME="35_osx" TEST_ARGS="--skip-slow --skip-network" JOB_TAG="_OSX" TRAVIS_PYTHON_VERSION=3.5 USE_CACHE=true
4437
- python: 2.7
4538
env:
46-
- PYTHON_VERSION=2.7
47-
- JOB_NAME: "27_slow_nnet_LOCALE"
48-
- TEST_ARGS="--only-slow --skip-network"
49-
- LOCALE_OVERRIDE="zh_CN.UTF-8"
50-
- FULL_DEPS=true
51-
- JOB_TAG=_LOCALE
52-
- CACHE_NAME="27_slow_nnet_LOCALE"
53-
- USE_CACHE=true
39+
- PYTHON_VERSION=2.7 JOB_NAME="27_slow_nnet_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8" JOB_TAG="_LOCALE" USE_CACHE=true
5440
addons:
5541
apt:
5642
packages:
5743
- language-pack-zh-hans
5844
- python: 2.7
5945
env:
60-
- PYTHON_VERSION=2.7
61-
- JOB_NAME: "27_nslow"
62-
- TEST_ARGS="--skip-slow"
63-
- FULL_DEPS=true
64-
- CLIPBOARD_GUI=gtk2
65-
- LINT=true
66-
- CACHE_NAME="27_nslow"
67-
- USE_CACHE=true
46+
- PYTHON_VERSION=2.7 JOB_NAME="27_nslow" TEST_ARGS="--skip-slow" LINT=true USE_CACHE=true
6847
addons:
6948
apt:
7049
packages:
7150
- python-gtk2
7251
- python: 3.5
7352
env:
74-
- PYTHON_VERSION=3.5
75-
- JOB_NAME: "35_nslow"
76-
- TEST_ARGS="--skip-slow --skip-network"
77-
- FULL_DEPS=true
78-
- CLIPBOARD=xsel
79-
- COVERAGE=true
80-
- CACHE_NAME="35_nslow"
81-
- USE_CACHE=true
53+
- PYTHON_VERSION=3.5 JOB_NAME="35_nslow" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true USE_CACHE=true
8254
addons:
8355
apt:
8456
packages:
8557
- xsel
8658
- python: 3.6
8759
env:
88-
- PYTHON_VERSION=3.6
89-
- JOB_NAME: "36"
90-
- TEST_ARGS="--skip-slow --skip-network"
91-
- PANDAS_TESTING_MODE="deprecate"
92-
- CONDA_FORGE=true
93-
- USE_CACHE=true
60+
- PYTHON_VERSION=3.6 JOB_NAME="36" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true USE_CACHE=true
9461
addons:
9562
apt:
9663
packages:
9764
- libatlas-base-dev
9865
- gfortran
99-
# In allow_failures
66+
# In allow_failures
10067
- python: 2.7
10168
env:
102-
- PYTHON_VERSION=2.7
103-
- JOB_NAME: "27_slow"
104-
- JOB_TAG=_SLOW
105-
- TEST_ARGS="--only-slow --skip-network"
106-
- FULL_DEPS=true
107-
- CACHE_NAME="27_slow"
108-
- USE_CACHE=true
109-
# In allow_failures
69+
- PYTHON_VERSION=2.7 JOB_NAME="27_slow" JOB_TAG="_SLOW" TEST_ARGS="--only-slow --skip-network" USE_CACHE=true
70+
# In allow_failures
11071
- python: 2.7
11172
env:
112-
- PYTHON_VERSION=2.7
113-
- JOB_NAME: "27_build_test"
114-
- JOB_TAG=_BUILD_TEST
115-
- TEST_ARGS="--skip-slow"
116-
- FULL_DEPS=true
117-
- BUILD_TEST=true
118-
- CACHE_NAME="27_build_test"
119-
- USE_CACHE=true
120-
# In allow_failures
73+
- PYTHON_VERSION=2.7 JOB_NAME="27_build_test" JOB_TAG="_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true USE_CACHE=true
74+
# In allow_failures
12175
- python: 3.5
12276
env:
123-
- PYTHON_VERSION=3.5
124-
- JOB_NAME: "35_numpy_dev"
125-
- JOB_TAG=_NUMPY_DEV
126-
- TEST_ARGS="--skip-slow --skip-network"
127-
- PANDAS_TESTING_MODE="deprecate"
128-
- CACHE_NAME="35_numpy_dev"
129-
- USE_CACHE=true
130-
# In allow_failures
77+
- PYTHON_VERSION=3.5 JOB_NAME="35_numpy_dev" JOB_TAG="_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" USE_CACHE=true
78+
# In allow_failures
13179
- python: 3.5
13280
env:
133-
- PYTHON_VERSION=3.5
134-
- JOB_NAME: "doc_build"
135-
- FULL_DEPS=true
136-
- DOC_BUILD=true
137-
- JOB_TAG=_DOC_BUILD
138-
- CACHE_NAME="doc_build"
139-
- USE_CACHE=true
81+
- PYTHON_VERSION=3.5 JOB_NAME="doc_build" DOC_BUILD=true JOB_TAG="_DOC_BUILD" USE_CACHE=true
14082
allow_failures:
14183
- python: 2.7
14284
env:
143-
- PYTHON_VERSION=2.7
144-
- JOB_NAME: "27_slow"
145-
- JOB_TAG=_SLOW
146-
- TEST_ARGS="--only-slow --skip-network"
147-
- FULL_DEPS=true
148-
- CACHE_NAME="27_slow"
149-
- USE_CACHE=true
85+
- PYTHON_VERSION=2.7 JOB_NAME="27_slow" JOB_TAG="_SLOW" TEST_ARGS="--only-slow --skip-network" USE_CACHE=true
15086
- python: 2.7
15187
env:
152-
- PYTHON_VERSION=2.7
153-
- JOB_NAME: "27_build_test"
154-
- JOB_TAG=_BUILD_TEST
155-
- TEST_ARGS="--skip-slow"
156-
- FULL_DEPS=true
157-
- BUILD_TEST=true
158-
- CACHE_NAME="27_build_test"
159-
- USE_CACHE=true
88+
- PYTHON_VERSION=2.7 JOB_NAME="27_build_test" JOB_TAG="_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true USE_CACHE=true
16089
- python: 3.5
16190
env:
162-
- PYTHON_VERSION=3.5
163-
- JOB_NAME: "35_numpy_dev"
164-
- JOB_TAG=_NUMPY_DEV
165-
- TEST_ARGS="--skip-slow --skip-network"
166-
- PANDAS_TESTING_MODE="deprecate"
167-
- CACHE_NAME="35_numpy_dev"
168-
- USE_CACHE=true
91+
- PYTHON_VERSION=3.5 JOB_NAME="35_numpy_dev" JOB_TAG="_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" USE_CACHE=true
16992
- python: 3.5
17093
env:
171-
- PYTHON_VERSION=3.5
172-
- JOB_NAME: "doc_build"
173-
- FULL_DEPS=true
174-
- DOC_BUILD=true
175-
- JOB_TAG=_DOC_BUILD
176-
- CACHE_NAME="doc_build"
177-
- USE_CACHE=true
94+
- PYTHON_VERSION=3.5 JOB_NAME="doc_build" DOC_BUILD=true JOB_TAG="_DOC_BUILD" USE_CACHE=true
17895

17996
before_install:
18097
- echo "before_install"
@@ -186,7 +103,7 @@ before_install:
186103
- git --version
187104
- git tag
188105
- ci/before_install_travis.sh
189-
- export DISPLAY=:99.0
106+
- export DISPLAY=":99.0"
190107

191108
install:
192109
- echo "install start"
@@ -209,12 +126,17 @@ script:
209126
- echo "script done"
210127

211128
after_success:
212-
- source activate pandas && codecov
129+
- if [ "$COVERAGE" ]; then
130+
source activate pandas && codecov --file /tmp/cov-single.xml /tmp/cov-multiple.xml;
131+
fi
213132

214133
after_script:
215134
- echo "after_script start"
216-
- ci/install_test.sh
217135
- source activate pandas && python -c "import pandas; pandas.show_versions();"
218-
- ci/print_skipped.py /tmp/single.xml
219-
- ci/print_skipped.py /tmp/multiple.xml
136+
- if [ -e /tmp/single.xml ]; then
137+
ci/print_skipped.py /tmp/single.xml;
138+
fi
139+
- if [ -e /tmp/multiple.xml ]; then
140+
ci/print_skipped.py /tmp/multiple.xml;
141+
fi
220142
- echo "after_script done"

asv_bench/benchmarks/groupby.py

+25-7
Original file line numberDiff line numberDiff line change
@@ -108,16 +108,34 @@ def setup(self):
108108
self.N = 10000
109109
self.labels = np.random.randint(0, 2000, size=self.N)
110110
self.labels2 = np.random.randint(0, 3, size=self.N)
111-
self.df = DataFrame({'key': self.labels, 'key2': self.labels2, 'value1': randn(self.N), 'value2': (['foo', 'bar', 'baz', 'qux'] * (self.N / 4)), })
112-
113-
def f(self, g):
111+
self.df = DataFrame({
112+
'key': self.labels,
113+
'key2': self.labels2,
114+
'value1': np.random.randn(self.N),
115+
'value2': (['foo', 'bar', 'baz', 'qux'] * (self.N // 4)),
116+
})
117+
118+
@staticmethod
119+
def scalar_function(g):
114120
return 1
115121

116-
def time_groupby_frame_apply(self):
117-
self.df.groupby(['key', 'key2']).apply(self.f)
122+
def time_groupby_frame_apply_scalar_function(self):
123+
self.df.groupby(['key', 'key2']).apply(self.scalar_function)
124+
125+
def time_groupby_frame_apply_scalar_function_overhead(self):
126+
self.df.groupby('key').apply(self.scalar_function)
127+
128+
@staticmethod
129+
def df_copy_function(g):
130+
# ensure that the group name is available (see GH #15062)
131+
g.name
132+
return g.copy()
133+
134+
def time_groupby_frame_df_copy_function(self):
135+
self.df.groupby(['key', 'key2']).apply(self.df_copy_function)
118136

119-
def time_groupby_frame_apply_overhead(self):
120-
self.df.groupby('key').apply(self.f)
137+
def time_groupby_frame_apply_df_copy_overhead(self):
138+
self.df.groupby('key').apply(self.df_copy_function)
121139

122140

123141
#----------------------------------------------------------------------

asv_bench/benchmarks/pandas_vb_common.py

+5
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@
2525
except:
2626
pass
2727

28+
try:
29+
Panel = Panel
30+
except Exception:
31+
Panel = WidePanel
32+
2833
# didn't add to namespace until later
2934
try:
3035
from pandas.core.index import MultiIndex

bench/bench_join_panel.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ def reindex_on_axis(panels, axis, axis_reindex):
4545
return p
4646

4747

48-
# Does the job but inefficient. It is better to handle
49-
# this like you read a table in pytables.
48+
# does the job but inefficient (better to handle like you read a table in
49+
# pytables...e.g create a LongPanel then convert to Wide)
5050
def create_panels_join(cls, panels):
5151
""" given an array of panels's, create a single panel """
5252
panels = [a for a in panels if a is not None]

ci/script_multi.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ if [ "$BUILD_TEST" ]; then
2727
cd /tmp
2828
python -c "import pandas; pandas.test(['-n 2'])"
2929
elif [ "$COVERAGE" ]; then
30-
echo pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/cov.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
31-
pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/cov.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
30+
echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
31+
pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
3232
else
3333
echo pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
3434
pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest

ci/script_single.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ fi
2020
if [ "$BUILD_TEST" ]; then
2121
echo "We are not running pytest as this is simply a build test."
2222
elif [ "$COVERAGE" ]; then
23-
echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
24-
pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
23+
echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
24+
pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
2525
else
2626
echo pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas
2727
pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest

doc/source/groupby.rst

+33-5
Original file line numberDiff line numberDiff line change
@@ -580,9 +580,21 @@ Transformation
580580
--------------
581581

582582
The ``transform`` method returns an object that is indexed the same (same size)
583-
as the one being grouped. Thus, the passed transform function should return a
584-
result that is the same size as the group chunk. For example, suppose we wished
585-
to standardize the data within each group:
583+
as the one being grouped. The transform function must:
584+
585+
* Return a result that is either the same size as the group chunk or
586+
broadcastable to the size of the group chunk (e.g., a scalar,
587+
``grouped.transform(lambda x: x.iloc[-1])``).
588+
* Operate column-by-column on the group chunk. The transform is applied to
589+
the first group chunk using chunk.apply.
590+
* Not perform in-place operations on the group chunk. Group chunks should
591+
be treated as immutable, and changes to a group chunk may produce unexpected
592+
results. For example, when using ``fillna``, ``inplace`` must be ``False``
593+
(``grouped.transform(lambda x: x.fillna(inplace=False))``).
594+
* (Optionally) operates on the entire group chunk. If this is supported, a
595+
fast path is used starting from the *second* chunk.
596+
597+
For example, suppose we wished to standardize the data within each group:
586598

587599
.. ipython:: python
588600
@@ -620,6 +632,21 @@ We can also visually compare the original and transformed data sets.
620632
@savefig groupby_transform_plot.png
621633
compare.plot()
622634
635+
Transformation functions that have lower dimension outputs are broadcast to
636+
match the shape of the input array.
637+
638+
.. ipython:: python
639+
640+
data_range = lambda x: x.max() - x.min()
641+
ts.groupby(key).transform(data_range)
642+
643+
Alternatively the built-in methods can be could be used to produce the same
644+
outputs
645+
646+
.. ipython:: python
647+
648+
ts.groupby(key).transform('max') - ts.groupby(key).transform('min')
649+
623650
Another common data transform is to replace missing data with the group mean.
624651

625652
.. ipython:: python
@@ -664,8 +691,9 @@ and that the transformed data contains no NAs.
664691
665692
.. note::
666693

667-
Some functions when applied to a groupby object will automatically transform the input, returning
668-
an object of the same shape as the original. Passing ``as_index=False`` will not affect these transformation methods.
694+
Some functions when applied to a groupby object will automatically transform
695+
the input, returning an object of the same shape as the original. Passing
696+
``as_index=False`` will not affect these transformation methods.
669697

670698
For example: ``fillna, ffill, bfill, shift``.
671699

doc/source/whatsnew/v0.20.0.txt

+6-1
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,7 @@ Deprecations
785785
- ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`)
786786
- The ``as_indexer`` keyword of ``Series.str.match()`` has been deprecated (ignored keyword) (:issue:`15257`).
787787
- The following top-level pandas functions have been deprecated and will be removed in a future version (:issue:`13790`)
788+
788789
* ``pd.pnow()``, replaced by ``Period.now()``
789790
* ``pd.Term``, is removed, as it is not applicable to user code. Instead use in-line string expressions in the where clause when searching in HDFStore
790791
* ``pd.Expr``, is removed, as it is not applicable to user code.
@@ -812,7 +813,7 @@ Removal of prior version deprecations/changes
812813
- The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`)
813814
- The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`)
814815
- ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`)
815-
- The ``LongPanel`` and ``WidePanel`` classes have been removed (:issue:`10892`)
816+
- Where clauses in ``pytables`` are only accepted as strings and expressions types and not other data-types (:issue:`12027`)
816817

817818
.. _whatsnew_0200.performance:
818819

@@ -830,6 +831,9 @@ Performance Improvements
830831
- Improved performance when using ``.unstack()`` (:issue:`15503`)
831832
- Improved performance of merge/join on ``category`` columns (:issue:`10409`)
832833
- Improved performance of ``drop_duplicates()`` on ``bool`` columns (:issue:`12963`)
834+
- Improve performance of ``pd.core.groupby.GroupBy.apply`` when the applied
835+
function used the ``.name`` attribute of the group DataFrame (:issue:`15062`).
836+
833837

834838

835839
.. _whatsnew_0200.bug_fixes:
@@ -864,6 +868,7 @@ Bug Fixes
864868
- Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`)
865869
- Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`)
866870
- Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`)
871+
- Bug in ``Series.asof`` which raised if the series contained all ``np.nan`` (:issue:`15713`)
867872

868873
- Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`)
869874

pandas/_libs/src/reduce.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,7 @@ def apply_frame_axis0(object frame, object f, object names,
497497
# Need to infer if our low-level mucking is going to cause a segfault
498498
if n > 0:
499499
chunk = frame.iloc[starts[0]:ends[0]]
500-
shape_before = chunk.shape
500+
object.__setattr__(chunk, 'name', names[0])
501501
try:
502502
result = f(chunk)
503503
if result is chunk:

0 commit comments

Comments
 (0)