Skip to content

Commit bda85e2

Browse files
committed
Merge remote-tracking branch 'upstream/master' into pandas-dev#22150
# Conflicts: # doc/source/whatsnew/v0.24.0.txt
2 parents 827178e + 1c500fb commit bda85e2

File tree

156 files changed

+3316
-2393
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

156 files changed

+3316
-2393
lines changed

.coveragerc

-29
This file was deleted.

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ build: clean_pyc
1313
python setup.py build_ext --inplace
1414

1515
lint-diff:
16-
git diff master --name-only -- "*.py" | grep "pandas" | xargs flake8
16+
git diff master --name-only -- "*.py" | grep -E "pandas|scripts" | xargs flake8
1717

1818
develop: build
1919
-python setup.py develop

asv_bench/benchmarks/strings.py

+27-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import warnings
22

33
import numpy as np
4-
from pandas import Series
4+
from pandas import Series, DataFrame
55
import pandas.util.testing as tm
66

77

@@ -12,9 +12,6 @@ class Methods(object):
1212
def setup(self):
1313
self.s = Series(tm.makeStringIndex(10**5))
1414

15-
def time_cat(self):
16-
self.s.str.cat(sep=',')
17-
1815
def time_center(self):
1916
self.s.str.center(100)
2017

@@ -87,6 +84,32 @@ def time_repeat(self, repeats):
8784
self.s.str.repeat(self.repeat)
8885

8986

87+
class Cat(object):
88+
89+
goal_time = 0.2
90+
params = ([0, 3], [None, ','], [None, '-'], [0.0, 0.001, 0.15])
91+
param_names = ['other_cols', 'sep', 'na_rep', 'na_frac']
92+
93+
def setup(self, other_cols, sep, na_rep, na_frac):
94+
N = 10 ** 5
95+
mask_gen = lambda: np.random.choice([True, False], N,
96+
p=[1 - na_frac, na_frac])
97+
self.s = Series(tm.makeStringIndex(N)).where(mask_gen())
98+
if other_cols == 0:
99+
# str.cat self-concatenates only for others=None
100+
self.others = None
101+
else:
102+
self.others = DataFrame({i: tm.makeStringIndex(N).where(mask_gen())
103+
for i in range(other_cols)})
104+
105+
def time_cat(self, other_cols, sep, na_rep, na_frac):
106+
# before the concatenation (one caller + other_cols columns), the total
107+
# expected fraction of rows containing any NaN is:
108+
# reduce(lambda t, _: t + (1 - t) * na_frac, range(other_cols + 1), 0)
109+
# for other_cols=3 and na_frac=0.15, this works out to ~48%
110+
self.s.str.cat(others=self.others, sep=sep, na_rep=na_rep)
111+
112+
90113
class Contains(object):
91114

92115
goal_time = 0.2

ci/appveyor-27.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ dependencies:
1313
- matplotlib
1414
- numexpr
1515
- numpy=1.12*
16-
- openpyxl
16+
- openpyxl=2.5.5
1717
- pytables
1818
- python=2.7.*
1919
- pytz

ci/appveyor-36.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ dependencies:
1010
- matplotlib
1111
- numexpr
1212
- numpy=1.14*
13-
- openpyxl
13+
- openpyxl=2.5.5
1414
- pyarrow
1515
- pytables
1616
- python-dateutil

ci/circle-27-compat.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ dependencies:
88
- jinja2=2.8
99
- numexpr=2.4.4 # we test that we correctly don't use an unsupported numexpr
1010
- numpy=1.9.3
11-
- openpyxl
11+
- openpyxl=2.5.5
1212
- psycopg2
1313
- pytables=3.2.2
1414
- python-dateutil=2.5.0

ci/circle-36-locale.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ dependencies:
1313
- nomkl
1414
- numexpr
1515
- numpy
16-
- openpyxl
16+
- openpyxl=2.5.5
1717
- psycopg2
1818
- pymysql
1919
- pytables

ci/circle-36-locale_slow.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ dependencies:
1414
- nomkl
1515
- numexpr
1616
- numpy
17-
- openpyxl
17+
- openpyxl=2.5.5
1818
- psycopg2
1919
- pymysql
2020
- pytables

ci/lint.sh

+6-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ if [ "$LINT" ]; then
2424
if [ $? -ne "0" ]; then
2525
RET=1
2626
fi
27+
28+
flake8 scripts/tests --filename=*.py
29+
if [ $? -ne "0" ]; then
30+
RET=1
31+
fi
2732
echo "Linting *.py DONE"
2833

2934
echo "Linting setup.py"
@@ -175,7 +180,7 @@ if [ "$LINT" ]; then
175180
RET=1
176181
fi
177182
echo "Check for old-style classes DONE"
178-
183+
179184
echo "Check for backticks incorrectly rendering because of missing spaces"
180185
grep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
181186

ci/requirements-optional-conda.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ lxml
1212
matplotlib
1313
nbsphinx
1414
numexpr
15-
openpyxl
15+
openpyxl=2.5.5
1616
pyarrow
1717
pymysql
1818
pytables

ci/requirements-optional-pip.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ lxml
1414
matplotlib
1515
nbsphinx
1616
numexpr
17-
openpyxl
17+
openpyxl=2.5.5
1818
pyarrow
1919
pymysql
2020
tables
@@ -28,4 +28,4 @@ statsmodels
2828
xarray
2929
xlrd
3030
xlsxwriter
31-
xlwt
31+
xlwt

ci/script_single.sh

+2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ elif [ "$COVERAGE" ]; then
2828
echo pytest -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
2929
pytest -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
3030

31+
echo pytest -s -r xXs --strict scripts
32+
pytest -s -r xXs --strict scripts
3133
else
3234
echo pytest -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas
3335
pytest -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas # TODO: doctest

ci/travis-35-osx.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ dependencies:
1212
- nomkl
1313
- numexpr
1414
- numpy=1.10.4
15-
- openpyxl
15+
- openpyxl=2.5.5
1616
- pytables
1717
- python=3.5*
1818
- pytz

ci/travis-36-doc.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ dependencies:
2222
- notebook
2323
- numexpr
2424
- numpy=1.13*
25-
- openpyxl
25+
- openpyxl=2.5.5
2626
- pandoc
2727
- pyqt
2828
- pytables

ci/travis-36-slow.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ dependencies:
1010
- matplotlib
1111
- numexpr
1212
- numpy
13-
- openpyxl
13+
- openpyxl=2.5.5
1414
- patsy
1515
- psycopg2
1616
- pymysql

ci/travis-36.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ dependencies:
1818
- nomkl
1919
- numexpr
2020
- numpy
21-
- openpyxl
21+
- openpyxl=2.5.5
2222
- psycopg2
2323
- pyarrow
2424
- pymysql

doc/source/advanced.rst

+27-26
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
MultiIndex / Advanced Indexing
1616
******************************
1717

18-
This section covers indexing with a ``MultiIndex`` and more advanced indexing features.
18+
This section covers indexing with a ``MultiIndex`` and :ref:`more advanced indexing features <indexing.index_types>`.
1919

2020
See the :ref:`Indexing and Selecting Data <indexing>` for general indexing documentation.
2121

@@ -51,13 +51,13 @@ See the :ref:`cookbook<cookbook.multi_index>` for some advanced strategies.
5151
Creating a MultiIndex (hierarchical index) object
5252
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5353

54-
The ``MultiIndex`` object is the hierarchical analogue of the standard
55-
``Index`` object which typically stores the axis labels in pandas objects. You
54+
The :class:`MultiIndex` object is the hierarchical analogue of the standard
55+
:class:`Index` object which typically stores the axis labels in pandas objects. You
5656
can think of ``MultiIndex`` as an array of tuples where each tuple is unique. A
5757
``MultiIndex`` can be created from a list of arrays (using
58-
``MultiIndex.from_arrays``), an array of tuples (using
59-
``MultiIndex.from_tuples``), or a crossed set of iterables (using
60-
``MultiIndex.from_product``). The ``Index`` constructor will attempt to return
58+
:meth:`MultiIndex.from_arrays`), an array of tuples (using
59+
:meth:`MultiIndex.from_tuples`), or a crossed set of iterables (using
60+
:meth:`MultiIndex.from_product`). The ``Index`` constructor will attempt to return
6161
a ``MultiIndex`` when it is passed a list of tuples. The following examples
6262
demonstrate different ways to initialize MultiIndexes.
6363

@@ -76,15 +76,15 @@ demonstrate different ways to initialize MultiIndexes.
7676
s
7777
7878
When you want every pairing of the elements in two iterables, it can be easier
79-
to use the ``MultiIndex.from_product`` function:
79+
to use the :meth:`MultiIndex.from_product` method:
8080

8181
.. ipython:: python
8282
8383
iterables = [['bar', 'baz', 'foo', 'qux'], ['one', 'two']]
8484
pd.MultiIndex.from_product(iterables, names=['first', 'second'])
8585
8686
As a convenience, you can pass a list of arrays directly into Series or
87-
DataFrame to construct a MultiIndex automatically:
87+
DataFrame to construct a ``MultiIndex`` automatically:
8888

8989
.. ipython:: python
9090
@@ -140,7 +140,7 @@ may wish to generate your own ``MultiIndex`` when preparing the data set.
140140
Reconstructing the level labels
141141
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
142142

143-
The method ``get_level_values`` will return a vector of the labels for each
143+
The method :meth:`~MultiIndex.get_level_values` will return a vector of the labels for each
144144
location at a particular level:
145145

146146
.. ipython:: python
@@ -183,7 +183,7 @@ For example:
183183
184184
This is done to avoid a recomputation of the levels in order to make slicing
185185
highly performant. If you want to see only the used levels, you can use the
186-
:func:`MultiIndex.get_level_values` method.
186+
:meth:`~MultiIndex.get_level_values` method.
187187

188188
.. ipython:: python
189189
@@ -193,7 +193,7 @@ highly performant. If you want to see only the used levels, you can use the
193193
df[['foo','qux']].columns.get_level_values(0)
194194
195195
To reconstruct the ``MultiIndex`` with only the used levels, the
196-
``remove_unused_levels`` method may be used.
196+
:meth:`~MultiIndex.remove_unused_levels` method may be used.
197197

198198
.. versionadded:: 0.20.0
199199

@@ -400,8 +400,8 @@ You can use a right-hand-side of an alignable object as well.
400400
Cross-section
401401
~~~~~~~~~~~~~
402402

403-
The ``xs`` method of ``DataFrame`` additionally takes a level argument to make
404-
selecting data at a particular level of a MultiIndex easier.
403+
The :meth:`~DataFrame.xs` method of ``DataFrame`` additionally takes a level argument to make
404+
selecting data at a particular level of a ``MultiIndex`` easier.
405405

406406
.. ipython:: python
407407
@@ -519,7 +519,7 @@ to be sorted. As with any index, you can use ``sort_index``.
519519
520520
.. _advanced.sortlevel_byname:
521521

522-
You may also pass a level name to ``sort_index`` if the MultiIndex levels
522+
You may also pass a level name to ``sort_index`` if the ``MultiIndex`` levels
523523
are named.
524524

525525
.. ipython:: python
@@ -566,7 +566,8 @@ Furthermore, if you try to index something that is not fully lexsorted, this can
566566
In [5]: dfm.loc[(0,'y'):(1, 'z')]
567567
UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (1)'
568568
569-
The ``is_lexsorted()`` method on an ``Index`` show if the index is sorted, and the ``lexsort_depth`` property returns the sort depth:
569+
The :meth:`~MultiIndex.is_lexsorted` method on a ``MultiIndex`` shows if the
570+
index is sorted, and the ``lexsort_depth`` property returns the sort depth:
570571

571572
.. ipython:: python
572573
@@ -591,8 +592,8 @@ Take Methods
591592

592593
.. _advanced.take:
593594

594-
Similar to NumPy ndarrays, pandas Index, Series, and DataFrame also provides
595-
the ``take`` method that retrieves elements along a given axis at the given
595+
Similar to NumPy ndarrays, pandas ``Index``, ``Series``, and ``DataFrame`` also provides
596+
the :meth:`~DataFrame.take` method that retrieves elements along a given axis at the given
596597
indices. The given indices must be either a list or an ndarray of integer
597598
index positions. ``take`` will also accept negative integers as relative positions to the end of the object.
598599

@@ -668,8 +669,8 @@ In the following sub-sections we will highlight some other index types.
668669
CategoricalIndex
669670
~~~~~~~~~~~~~~~~
670671

671-
``CategoricalIndex`` is a type of index that is useful for supporting
672-
indexing with duplicates. This is a container around a ``Categorical``
672+
:class:`CategoricalIndex` is a type of index that is useful for supporting
673+
indexing with duplicates. This is a container around a :class:`Categorical`
673674
and allows efficient indexing and storage of an index with a large number of duplicated elements.
674675

675676
.. ipython:: python
@@ -758,19 +759,19 @@ Int64Index and RangeIndex
758759
759760
Indexing on an integer-based Index with floats has been clarified in 0.18.0, for a summary of the changes, see :ref:`here <whatsnew_0180.float_indexers>`.
760761
761-
``Int64Index`` is a fundamental basic index in pandas.
762-
This is an Immutable array implementing an ordered, sliceable set.
762+
:class:`Int64Index` is a fundamental basic index in pandas.
763+
This is an immutable array implementing an ordered, sliceable set.
763764
Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``NDFrame`` objects.
764765
765-
``RangeIndex`` is a sub-class of ``Int64Index`` added in version 0.18.0, now providing the default index for all ``NDFrame`` objects.
766+
:class:`RangeIndex` is a sub-class of ``Int64Index`` added in version 0.18.0, now providing the default index for all ``NDFrame`` objects.
766767
``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analogous to Python `range types <https://docs.python.org/3/library/stdtypes.html#typesseq-range>`__.
767768
768769
.. _indexing.float64index:
769770
770771
Float64Index
771772
~~~~~~~~~~~~
772773
773-
By default a ``Float64Index`` will be automatically created when passing floating, or mixed-integer-floating values in index creation.
774+
By default a :class:`Float64Index` will be automatically created when passing floating, or mixed-integer-floating values in index creation.
774775
This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the
775776
same.
776777
@@ -875,9 +876,9 @@ IntervalIndex
875876
876877
.. versionadded:: 0.20.0
877878
878-
:class:`IntervalIndex` together with its own dtype, ``interval`` as well as the
879-
:class:`Interval` scalar type, allow first-class support in pandas for interval
880-
notation.
879+
:class:`IntervalIndex` together with its own dtype, :class:`~pandas.api.types.IntervalDtype`
880+
as well as the :class:`Interval` scalar type, allow first-class support in pandas
881+
for interval notation.
881882
882883
The ``IntervalIndex`` allows some unique indexing and is also used as a
883884
return type for the categories in :func:`cut` and :func:`qcut`.

doc/source/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -2559,6 +2559,7 @@ objects.
25592559
.. autosummary::
25602560
:toctree: generated/
25612561

2562+
api.extensions.register_extension_dtype
25622563
api.extensions.register_dataframe_accessor
25632564
api.extensions.register_series_accessor
25642565
api.extensions.register_index_accessor

0 commit comments

Comments
 (0)