Skip to content

Commit e12bca7

Browse files
author
Carlos Souza
committed
Sync fork
2 parents 676a4e5 + 9d3554c commit e12bca7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+1370
-1113
lines changed

.github/PULL_REQUEST_TEMPLATE.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
- [ ] closes #xxxx
22
- [ ] tests added / passed
3-
- [ ] passes ``git diff upstream/master | flake8 --diff``
3+
- [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff``
44
- [ ] whatsnew entry

.travis.yml

+5-4
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ matrix:
2828
os: osx
2929
compiler: clang
3030
osx_image: xcode6.4
31-
cache: ccache
31+
cache:
32+
ccache: true
33+
directories:
34+
- $HOME/.cache # cython cache
35+
- $HOME/.ccache # compiler cache
3236
env:
3337
- PYTHON_VERSION=3.5
3438
- JOB_NAME: "35_osx"
@@ -175,13 +179,10 @@ matrix:
175179
before_install:
176180
- echo "before_install"
177181
- source ci/travis_process_gbq_encryption.sh
178-
- echo $VIRTUAL_ENV
179182
- export PATH="$HOME/miniconda3/bin:$PATH"
180183
- df -h
181-
- date
182184
- pwd
183185
- uname -a
184-
- python -V
185186
- git --version
186187
- git tag
187188
- ci/before_install_travis.sh

asv_bench/benchmarks/reindex.py

+5
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ def setup(self):
132132
self.K = 10000
133133
self.key1 = np.random.randint(0, self.K, size=self.N)
134134
self.df_int = DataFrame({'key1': self.key1})
135+
self.df_bool = DataFrame({i: np.random.randint(0, 2, size=self.K,
136+
dtype=bool)
137+
for i in range(10)})
135138

136139
def time_frame_drop_dups(self):
137140
self.df.drop_duplicates(['key1', 'key2'])
@@ -154,6 +157,8 @@ def time_series_drop_dups_string(self):
154157
def time_frame_drop_dups_int(self):
155158
self.df_int.drop_duplicates()
156159

160+
def time_frame_drop_dups_bool(self):
161+
self.df_bool.drop_duplicates()
157162

158163
#----------------------------------------------------------------------
159164
# blog "pandas escaped the zoo"

ci/install_travis.sh

+29-21
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,6 @@
11
#!/bin/bash
22

3-
# There are 2 distinct pieces that get zipped and cached
4-
# - The venv site-packages dir including the installed dependencies
5-
# - The pandas build artifacts, using the build cache support via
6-
# scripts/use_build_cache.py
7-
#
8-
# if the user opted in to use the cache and we're on a whitelisted fork
9-
# - if the server doesn't hold a cached version of venv/pandas build,
10-
# do things the slow way, and put the results on the cache server
11-
# for the next time.
12-
# - if the cache files are available, instal some necessaries via apt
13-
# (no compiling needed), then directly goto script and collect 200$.
14-
#
15-
3+
# edit the locale file if needed
164
function edit_init()
175
{
186
if [ -n "$LOCALE_OVERRIDE" ]; then
@@ -26,20 +14,18 @@ function edit_init()
2614
fi
2715
}
2816

17+
echo
2918
echo "[install_travis]"
3019
edit_init
3120

3221
home_dir=$(pwd)
33-
echo "[home_dir: $home_dir]"
34-
35-
if [ "${TRAVIS_OS_NAME}" == "osx" ]; then
36-
echo "[install ccache]"
37-
time brew install ccache
38-
fi
22+
echo
23+
echo "[home_dir]: $home_dir"
3924

4025
# install miniconda
4126
MINICONDA_DIR="$HOME/miniconda3"
4227

28+
echo
4329
echo "[Using clean Miniconda install]"
4430

4531
if [ -d "$MINICONDA_DIR" ]; then
@@ -54,14 +40,17 @@ else
5440
fi
5541
time bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1
5642

43+
echo
5744
echo "[show conda]"
5845
which conda
5946

47+
echo
6048
echo "[update conda]"
6149
conda config --set ssl_verify false || exit 1
6250
conda config --set always_yes true --set changeps1 false || exit 1
6351
conda update -q conda
6452

53+
echo
6554
echo "[add channels]"
6655
# add the pandas channel to take priority
6756
# to add extra packages
@@ -78,18 +67,28 @@ fi
7867
conda info -a || exit 1
7968

8069
# set the compiler cache to work
70+
echo
8171
if [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then
8272
echo "[Using ccache]"
8373
export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH
8474
gcc=$(which gcc)
85-
echo "[gcc: $gcc]"
75+
echo "[gcc]: $gcc"
8676
ccache=$(which ccache)
87-
echo "[ccache: $ccache]"
77+
echo "[ccache]: $ccache"
8878
export CC='ccache gcc'
79+
elif [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "osx" ]; then
80+
echo "[Using ccache]"
81+
time brew install ccache
82+
export PATH=/usr/local/opt/ccache/libexec:$PATH
83+
gcc=$(which gcc)
84+
echo "[gcc]: $gcc"
85+
ccache=$(which ccache)
86+
echo "[ccache]: $ccache"
8987
else
9088
echo "[Not using ccache]"
9189
fi
9290

91+
echo
9392
echo "[create env]"
9493

9594
# may have installation instructions for this build
@@ -103,13 +102,15 @@ else
103102
fi
104103

105104
# build deps
105+
echo
106106
echo "[build installs]"
107107
REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build"
108108
if [ -e ${REQ} ]; then
109109
time conda install -n pandas --file=${REQ} || exit 1
110110
fi
111111

112112
# may have addtl installation instructions for this build
113+
echo
113114
echo "[build addtl installs]"
114115
REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build.sh"
115116
if [ -e ${REQ} ]; then
@@ -129,6 +130,7 @@ if [ "$COVERAGE" ]; then
129130
pip install coverage pytest-cov
130131
fi
131132

133+
echo
132134
if [ "$BUILD_TEST" ]; then
133135

134136
# build & install testing
@@ -148,20 +150,23 @@ else
148150
fi
149151

150152
# we may have run installations
153+
echo
151154
echo "[conda installs]"
152155
REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.run"
153156
if [ -e ${REQ} ]; then
154157
time conda install -n pandas --file=${REQ} || exit 1
155158
fi
156159

157160
# we may have additional pip installs
161+
echo
158162
echo "[pip installs]"
159163
REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip"
160164
if [ -e ${REQ} ]; then
161165
pip install -r $REQ
162166
fi
163167

164168
# may have addtl installation instructions for this build
169+
echo
165170
echo "[addtl installs]"
166171
REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.sh"
167172
if [ -e ${REQ} ]; then
@@ -173,14 +178,17 @@ if [ -z "$BUILD_TEST" ]; then
173178

174179
# remove any installed pandas package
175180
# w/o removing anything else
181+
echo
176182
echo "[removing installed pandas]"
177183
conda remove pandas --force
178184

179185
# install our pandas
186+
echo
180187
echo "[running setup.py develop]"
181188
python setup.py develop || exit 1
182189

183190
fi
184191

192+
echo
185193
echo "[done]"
186194
exit 0

codecov.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
codecov:
2+
branch: master
3+
14
coverage:
25
status:
36
project:
@@ -6,4 +9,3 @@ coverage:
69
patch:
710
default:
811
target: '50'
9-
branches: null

doc/source/contributing.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ Travis-CI will run the `flake8 <http://pypi.python.org/pypi/flake8>`_ tool
518518
and report any stylistic errors in your code. Therefore, it is helpful before
519519
submitting code to run the check yourself on the diff::
520520

521-
git diff master | flake8 --diff
521+
git diff master --name-only -- '*.py' | flake8 --diff
522522

523523
This command will catch any stylistic errors in your changes specifically, but
524524
be beware it may not catch all of them. For example, if you delete the only

doc/source/text.rst

-12
Original file line numberDiff line numberDiff line change
@@ -385,18 +385,6 @@ or match a pattern:
385385
The distinction between ``match`` and ``contains`` is strictness: ``match``
386386
relies on strict ``re.match``, while ``contains`` relies on ``re.search``.
387387

388-
.. warning::
389-
390-
In previous versions, ``match`` was for *extracting* groups,
391-
returning a not-so-convenient Series of tuples. The new method ``extract``
392-
(described in the previous section) is now preferred.
393-
394-
This old, deprecated behavior of ``match`` is still the default. As
395-
demonstrated above, use the new behavior by setting ``as_indexer=True``.
396-
In this mode, ``match`` is analogous to ``contains``, returning a boolean
397-
Series. The new behavior will become the default behavior in a future
398-
release.
399-
400388
Methods like ``match``, ``contains``, ``startswith``, and ``endswith`` take
401389
an extra ``na`` argument so missing values can be considered True or False:
402390

doc/source/whatsnew/v0.20.0.txt

+46-1
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ Other enhancements
291291
- ``Series`` provides a ``to_excel`` method to output Excel files (:issue:`8825`)
292292
- The ``usecols`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`14154`)
293293
- The ``skiprows`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`10882`)
294+
- The ``nrows`` and ``chunksize`` arguments in ``pd.read_csv()`` are supported if both are passed (:issue:`6774`, :issue:`15755`)
294295
- ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`)
295296
- ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
296297
- ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs <timedeltas.isoformat>` (:issue:`15136`)
@@ -470,6 +471,38 @@ New Behavior:
470471

471472
s.map(lambda x: x.hour)
472473

474+
475+
.. _whatsnew_0200.api_breaking.index_dt_field:
476+
477+
Accessing datetime fields of Index now return Index
478+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
479+
480+
The datetime-related attributes (see :ref:`here <timeseries.components>`
481+
for an overview) of ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex`` previously
482+
returned numpy arrays. They will now return a new ``Index`` object, except
483+
in the case of a boolean field, where the result will stil be a boolean ndarray. (:issue:`15022`)
484+
485+
Previous behaviour:
486+
487+
.. code-block:: ipython
488+
489+
In [1]: idx = pd.date_range("2015-01-01", periods=5, freq='10H')
490+
491+
In [2]: idx.hour
492+
Out[2]: array([ 0, 10, 20, 6, 16], dtype=int32)
493+
494+
New Behavior:
495+
496+
.. ipython:: python
497+
498+
idx = pd.date_range("2015-01-01", periods=5, freq='10H')
499+
idx.hour
500+
501+
This has the advantage that specific ``Index`` methods are still available on the
502+
result. On the other hand, this might have backward incompatibilities: e.g.
503+
compared to numpy arrays, ``Index`` objects are not mutable. To get the original
504+
ndarray, you can always convert explicitly using ``np.asarray(idx.hour)``.
505+
473506
.. _whatsnew_0200.api_breaking.s3:
474507

475508
S3 File Handling
@@ -728,6 +761,12 @@ Other API Changes
728761
- ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`)
729762
- ``.merge()`` and ``.join()`` on ``category`` dtype columns will now preserve the category dtype when possible (:issue:`10409`)
730763
- ``SparseDataFrame.default_fill_value`` will be 0, previously was ``nan`` in the return from ``pd.get_dummies(..., sparse=True)`` (:issue:`15594`)
764+
- The default behaviour of ``Series.str.match`` has changed from extracting
765+
groups to matching the pattern. The extracting behaviour was deprecated
766+
since pandas version 0.13.0 and can be done with the ``Series.str.extract``
767+
method (:issue:`5224`). As a consequence, the ``as_indexer`` keyword is
768+
ignored (no longer needed to specify the new behaviour) and is deprecated.
769+
731770

732771
.. _whatsnew_0200.deprecations:
733772

@@ -744,6 +783,7 @@ Deprecations
744783
- ``Series.sortlevel`` and ``DataFrame.sortlevel`` have been deprecated in favor of ``Series.sort_index`` and ``DataFrame.sort_index`` (:issue:`15099`)
745784
- importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explict imports (:issue:`15358`)
746785
- ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`)
786+
- The ``as_indexer`` keyword of ``Series.str.match()`` has been deprecated (ignored keyword) (:issue:`15257`).
747787
- The following top-level pandas functions have been deprecated and will be removed in a future version (:issue:`13790`)
748788
* ``pd.pnow()``, replaced by ``Period.now()``
749789
* ``pd.Term``, is removed, as it is not applicable to user code. Instead use in-line string expressions in the where clause when searching in HDFStore
@@ -789,6 +829,7 @@ Performance Improvements
789829
- Improved performance of ``.rank()`` for categorical data (:issue:`15498`)
790830
- Improved performance when using ``.unstack()`` (:issue:`15503`)
791831
- Improved performance of merge/join on ``category`` columns (:issue:`10409`)
832+
- Improved performance of ``drop_duplicates()`` on ``bool`` columns (:issue:`12963`)
792833

793834

794835
.. _whatsnew_0200.bug_fixes:
@@ -813,6 +854,7 @@ Bug Fixes
813854
- Bug in ``pd.read_fwf`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`)
814855
- Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`)
815856
- Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`)
857+
- Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`)
816858
- Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`)
817859
- Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`)
818860
- Bug in using ``__deepcopy__`` on empty NDFrame objects (:issue:`15370`)
@@ -823,9 +865,10 @@ Bug Fixes
823865
- Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`)
824866
- Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`)
825867

868+
- Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`)
826869

827870
- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`)
828-
- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`)
871+
- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`)
829872

830873
- Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`)
831874

@@ -916,6 +959,8 @@ Bug Fixes
916959
- Avoid use of ``np.finfo()`` during ``import pandas`` removed to mitigate deadlock on Python GIL misuse (:issue:`14641`)
917960

918961
- Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`)
962+
- Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`)
963+
919964
- Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`)
920965
- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`)
921966

pandas/_libs/algos.pxd

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from util cimport numeric
2+
from numpy cimport float64_t, double_t
3+
4+
cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil
5+
6+
cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
7+
cdef numeric t
8+
9+
# cython doesn't allow pointer dereference so use array syntax
10+
t = a[0]
11+
a[0] = b[0]
12+
b[0] = t
13+
return 0

0 commit comments

Comments
 (0)