Skip to content

Commit 64b4bd0

Browse files
committed
Merge remote-tracking branch 'upstream/master' into set_accessor
2 parents b46611f + 5fbb683 commit 64b4bd0

File tree

165 files changed

+5271
-1461
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

165 files changed

+5271
-1461
lines changed

Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ doc:
2323
cd doc; \
2424
python make.py clean; \
2525
python make.py html
26+
python make.py spellcheck

asv_bench/benchmarks/categoricals.py

+13
Original file line numberDiff line numberDiff line change
@@ -193,3 +193,16 @@ def time_categorical_series_is_monotonic_increasing(self):
193193

194194
def time_categorical_series_is_monotonic_decreasing(self):
195195
self.s.is_monotonic_decreasing
196+
197+
198+
class Contains(object):
199+
200+
goal_time = 0.2
201+
202+
def setup(self):
203+
N = 10**5
204+
self.ci = tm.makeCategoricalIndex(N)
205+
self.cat = self.ci.categories[0]
206+
207+
def time_contains(self):
208+
self.cat in self.ci

asv_bench/benchmarks/groupby.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77
from pandas import (DataFrame, Series, MultiIndex, date_range, period_range,
8-
TimeGrouper, Categorical)
8+
TimeGrouper, Categorical, Timestamp)
99
import pandas.util.testing as tm
1010

1111
from .pandas_vb_common import setup # noqa
@@ -385,6 +385,25 @@ def time_dtype_as_field(self, dtype, method, application):
385385
self.as_field_method()
386386

387387

388+
class RankWithTies(object):
389+
# GH 21237
390+
goal_time = 0.2
391+
param_names = ['dtype', 'tie_method']
392+
params = [['float64', 'float32', 'int64', 'datetime64'],
393+
['first', 'average', 'dense', 'min', 'max']]
394+
395+
def setup(self, dtype, tie_method):
396+
N = 10**4
397+
if dtype == 'datetime64':
398+
data = np.array([Timestamp("2011/01/01")] * N, dtype=dtype)
399+
else:
400+
data = np.array([1] * N, dtype=dtype)
401+
self.df = DataFrame({'values': data, 'key': ['foo'] * N})
402+
403+
def time_rank_ties(self, dtype, tie_method):
404+
self.df.groupby('key').rank(method=tie_method)
405+
406+
388407
class Float32(object):
389408
# GH 13335
390409
goal_time = 0.2

ci/environment-dev.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ dependencies:
1313
- pytz
1414
- setuptools>=24.2.0
1515
- sphinx
16+
- sphinxcontrib-spelling

ci/requirements_dev.txt

+1
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ python-dateutil>=2.5.0
99
pytz
1010
setuptools>=24.2.0
1111
sphinx
12+
sphinxcontrib-spelling

doc/make.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,9 @@ def _sphinx_build(self, kind):
224224
--------
225225
>>> DocBuilder(num_jobs=4)._sphinx_build('html')
226226
"""
227-
if kind not in ('html', 'latex'):
228-
raise ValueError('kind must be html or latex, not {}'.format(kind))
227+
if kind not in ('html', 'latex', 'spelling'):
228+
raise ValueError('kind must be html, latex or '
229+
'spelling, not {}'.format(kind))
229230

230231
self._run_os('sphinx-build',
231232
'-j{}'.format(self.num_jobs),
@@ -304,6 +305,18 @@ def zip_html(self):
304305
'-q',
305306
*fnames)
306307

308+
def spellcheck(self):
309+
"""Spell check the documentation."""
310+
self._sphinx_build('spelling')
311+
output_location = os.path.join('build', 'spelling', 'output.txt')
312+
with open(output_location) as output:
313+
lines = output.readlines()
314+
if lines:
315+
raise SyntaxError(
316+
'Found misspelled words.'
317+
' Check pandas/doc/build/spelling/output.txt'
318+
' for more details.')
319+
307320

308321
def main():
309322
cmds = [method for method in dir(DocBuilder) if not method.startswith('_')]

doc/source/_static/favicon.ico

3.81 KB
Binary file not shown.

doc/source/advanced.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ As usual, **both sides** of the slicers are included as this is label indexing.
342342
columns=micolumns).sort_index().sort_index(axis=1)
343343
dfmi
344344
345-
Basic multi-index slicing using slices, lists, and labels.
345+
Basic MultiIndex slicing using slices, lists, and labels.
346346

347347
.. ipython:: python
348348
@@ -1039,7 +1039,7 @@ On the other hand, if the index is not monotonic, then both slice bounds must be
10391039
KeyError: 'Cannot get right slice bound for non-unique label: 3'
10401040
10411041
:meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` only check that
1042-
an index is weakly monotonic. To check for strict montonicity, you can combine one of those with
1042+
an index is weakly monotonic. To check for strict monotonicity, you can combine one of those with
10431043
:meth:`Index.is_unique`
10441044
10451045
.. ipython:: python

doc/source/basics.rst

+8-8
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ either match on the *index* or *columns* via the **axis** keyword:
168168
169169
df_orig = df
170170
171-
Furthermore you can align a level of a multi-indexed DataFrame with a Series.
171+
Furthermore you can align a level of a MultiIndexed DataFrame with a Series.
172172

173173
.. ipython:: python
174174
@@ -593,7 +593,7 @@ categorical columns:
593593
frame = pd.DataFrame({'a': ['Yes', 'Yes', 'No', 'No'], 'b': range(4)})
594594
frame.describe()
595595
596-
This behaviour can be controlled by providing a list of types as ``include``/``exclude``
596+
This behavior can be controlled by providing a list of types as ``include``/``exclude``
597597
arguments. The special value ``all`` can also be used:
598598

599599
.. ipython:: python
@@ -1034,7 +1034,7 @@ Passing a single function to ``.transform()`` with a ``Series`` will yield a sin
10341034
Transform with multiple functions
10351035
+++++++++++++++++++++++++++++++++
10361036

1037-
Passing multiple functions will yield a column multi-indexed DataFrame.
1037+
Passing multiple functions will yield a column MultiIndexed DataFrame.
10381038
The first level will be the original frame column names; the second level
10391039
will be the names of the transforming functions.
10401040

@@ -1060,7 +1060,7 @@ Passing a dict of functions will allow selective transforming per column.
10601060
10611061
tsdf.transform({'A': np.abs, 'B': lambda x: x+1})
10621062
1063-
Passing a dict of lists will generate a multi-indexed DataFrame with these
1063+
Passing a dict of lists will generate a MultiIndexed DataFrame with these
10641064
selective transforms.
10651065

10661066
.. ipython:: python
@@ -1889,12 +1889,12 @@ faster than sorting the entire Series and calling ``head(n)`` on the result.
18891889
df.nsmallest(5, ['a', 'c'])
18901890
18911891
1892-
.. _basics.multi-index_sorting:
1892+
.. _basics.multiindex_sorting:
18931893

1894-
Sorting by a multi-index column
1895-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1894+
Sorting by a MultiIndex column
1895+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
18961896

1897-
You must be explicit about sorting when the column is a multi-index, and fully specify
1897+
You must be explicit about sorting when the column is a MultiIndex, and fully specify
18981898
all levels to ``by``.
18991899

19001900
.. ipython:: python

doc/source/conf.py

+19-6
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@
1616
import re
1717
import inspect
1818
import importlib
19-
from sphinx.ext.autosummary import _import_by_name
19+
import logging
2020
import warnings
21+
from sphinx.ext.autosummary import _import_by_name
2122

23+
logger = logging.getLogger(__name__)
2224

2325
try:
2426
raw_input # Python 2
@@ -75,8 +77,19 @@
7577
'nbsphinx',
7678
]
7779

80+
try:
81+
import sphinxcontrib.spelling # noqa
82+
except ImportError as err:
83+
logger.warn(('sphinxcontrib.spelling failed to import with error "{}". '
84+
'`spellcheck` command is not available.'.format(err)))
85+
else:
86+
extensions.append('sphinxcontrib.spelling')
87+
7888
exclude_patterns = ['**.ipynb_checkpoints']
7989

90+
spelling_word_list_filename = ['spelling_wordlist.txt', 'names_wordlist.txt']
91+
spelling_ignore_pypi_package_names = True
92+
8093
with open("index.rst") as f:
8194
index_rst_lines = f.readlines()
8295

@@ -200,16 +213,16 @@
200213
# of the sidebar.
201214
# html_logo = None
202215

203-
# The name of an image file (within the static path) to use as favicon of the
204-
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
205-
# pixels large.
206-
# html_favicon = None
207-
208216
# Add any paths that contain custom static files (such as style sheets) here,
209217
# relative to this directory. They are copied after the builtin static files,
210218
# so a file named "default.css" will overwrite the builtin "default.css".
211219
html_static_path = ['_static']
212220

221+
# The name of an image file (within the static path) to use as favicon of the
222+
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
223+
# pixels large.
224+
html_favicon = os.path.join(html_static_path[0], 'favicon.ico')
225+
213226
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
214227
# using the given strftime format.
215228
# html_last_updated_fmt = '%b %d, %Y'

doc/source/contributing.rst

+19
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,25 @@ the documentation are also built by Travis-CI. These docs are then hosted `here
436436
<http://pandas-docs.github.io/pandas-docs-travis>`__, see also
437437
the :ref:`Continuous Integration <contributing.ci>` section.
438438

439+
Spell checking documentation
440+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
441+
442+
When contributing to documentation to **pandas** it's good to check if your work
443+
contains any spelling errors. Sphinx provides an easy way to spell check documentation
444+
and docstrings.
445+
446+
Running the spell check is easy. Just navigate to your local ``pandas/doc/`` directory and run::
447+
448+
python make.py spellcheck
449+
450+
The spellcheck will take a few minutes to run (between 1 to 6 minutes). Sphinx will alert you
451+
with warnings and misspelt words - these misspelt words will be added to a file called
452+
``output.txt`` and you can find it on your local directory ``pandas/doc/build/spelling/``.
453+
454+
The Sphinx spelling extension uses an EN-US dictionary to correct words, what means that in
455+
some cases you might need to add a word to this dictionary. You can do so by adding the word to
456+
the bag-of-words file named ``spelling_wordlist.txt`` located in the folder ``pandas/doc/``.
457+
439458
.. _contributing.code:
440459

441460
Contributing to the code base

doc/source/contributing_docstring.rst

+4-4
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ left before or after the docstring. The text starts in the next line after the
103103
opening quotes. The closing quotes have their own line
104104
(meaning that they are not at the end of the last sentence).
105105

106-
In rare occasions reST styles like bold text or itallics will be used in
106+
In rare occasions reST styles like bold text or italics will be used in
107107
docstrings, but is it common to have inline code, which is presented between
108108
backticks. It is considered inline code:
109109

@@ -243,7 +243,7 @@ their use cases, if it is not too generic.
243243
"""
244244
Pivot a row index to columns.
245245
246-
When using a multi-index, a level can be pivoted so each value in
246+
When using a MultiIndex, a level can be pivoted so each value in
247247
the index becomes a column. This is especially useful when a subindex
248248
is repeated for the main index, and data is easier to visualize as a
249249
pivot table.
@@ -706,7 +706,7 @@ than 5, to show the example with the default values. If doing the ``mean``, we
706706
could use something like ``[1, 2, 3]``, so it is easy to see that the value
707707
returned is the mean.
708708

709-
For more complex examples (groupping for example), avoid using data without
709+
For more complex examples (grouping for example), avoid using data without
710710
interpretation, like a matrix of random numbers with columns A, B, C, D...
711711
And instead use a meaningful example, which makes it easier to understand the
712712
concept. Unless required by the example, use names of animals, to keep examples
@@ -877,7 +877,7 @@ be tricky. Here are some attention points:
877877
the actual error only the error name is sufficient.
878878

879879
* If there is a small part of the result that can vary (e.g. a hash in an object
880-
represenation), you can use ``...`` to represent this part.
880+
representation), you can use ``...`` to represent this part.
881881

882882
If you want to show that ``s.plot()`` returns a matplotlib AxesSubplot object,
883883
this will fail the doctest ::

doc/source/cookbook.rst

+11-11
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ New Columns
286286
df = pd.DataFrame(
287287
{'AAA' : [1,1,1,2,2,2,3,3], 'BBB' : [2,1,3,4,5,1,2,3]}); df
288288
289-
Method 1 : idxmin() to get the index of the mins
289+
Method 1 : idxmin() to get the index of the minimums
290290

291291
.. ipython:: python
292292
@@ -307,7 +307,7 @@ MultiIndexing
307307

308308
The :ref:`multindexing <advanced.hierarchical>` docs.
309309

310-
`Creating a multi-index from a labeled frame
310+
`Creating a MultiIndex from a labeled frame
311311
<http://stackoverflow.com/questions/14916358/reshaping-dataframes-in-pandas-based-on-column-labels>`__
312312

313313
.. ipython:: python
@@ -330,7 +330,7 @@ The :ref:`multindexing <advanced.hierarchical>` docs.
330330
Arithmetic
331331
**********
332332

333-
`Performing arithmetic with a multi-index that needs broadcasting
333+
`Performing arithmetic with a MultiIndex that needs broadcasting
334334
<http://stackoverflow.com/questions/19501510/divide-entire-pandas-multiindex-dataframe-by-dataframe-variable/19502176#19502176>`__
335335

336336
.. ipython:: python
@@ -342,7 +342,7 @@ Arithmetic
342342
Slicing
343343
*******
344344

345-
`Slicing a multi-index with xs
345+
`Slicing a MultiIndex with xs
346346
<http://stackoverflow.com/questions/12590131/how-to-slice-multindex-columns-in-pandas-dataframes>`__
347347

348348
.. ipython:: python
@@ -363,7 +363,7 @@ To take the cross section of the 1st level and 1st axis the index:
363363
364364
df.xs('six',level=1,axis=0)
365365
366-
`Slicing a multi-index with xs, method #2
366+
`Slicing a MultiIndex with xs, method #2
367367
<http://stackoverflow.com/questions/14964493/multiindex-based-indexing-in-pandas>`__
368368

369369
.. ipython:: python
@@ -386,13 +386,13 @@ To take the cross section of the 1st level and 1st axis the index:
386386
df.loc[(All,'Math'),('Exams')]
387387
df.loc[(All,'Math'),(All,'II')]
388388
389-
`Setting portions of a multi-index with xs
389+
`Setting portions of a MultiIndex with xs
390390
<http://stackoverflow.com/questions/19319432/pandas-selecting-a-lower-level-in-a-dataframe-to-do-a-ffill>`__
391391

392392
Sorting
393393
*******
394394

395-
`Sort by specific column or an ordered list of columns, with a multi-index
395+
`Sort by specific column or an ordered list of columns, with a MultiIndex
396396
<http://stackoverflow.com/questions/14733871/mutli-index-sorting-in-pandas>`__
397397

398398
.. ipython:: python
@@ -664,7 +664,7 @@ The :ref:`Pivot <reshaping.pivot>` docs.
664664
`Plot pandas DataFrame with year over year data
665665
<http://stackoverflow.com/questions/30379789/plot-pandas-data-frame-with-year-over-year-data>`__
666666

667-
To create year and month crosstabulation:
667+
To create year and month cross tabulation:
668668

669669
.. ipython:: python
670670
@@ -677,7 +677,7 @@ To create year and month crosstabulation:
677677
Apply
678678
*****
679679

680-
`Rolling Apply to Organize - Turning embedded lists into a multi-index frame
680+
`Rolling Apply to Organize - Turning embedded lists into a MultiIndex frame
681681
<http://stackoverflow.com/questions/17349981/converting-pandas-dataframe-with-categorical-values-into-binary-values>`__
682682

683683
.. ipython:: python
@@ -1029,8 +1029,8 @@ Skip row between header and data
10291029
01.01.1990 05:00;21;11;12;13
10301030
"""
10311031
1032-
Option 1: pass rows explicitly to skiprows
1033-
""""""""""""""""""""""""""""""""""""""""""
1032+
Option 1: pass rows explicitly to skip rows
1033+
"""""""""""""""""""""""""""""""""""""""""""
10341034

10351035
.. ipython:: python
10361036

0 commit comments

Comments
 (0)