Skip to content

Commit 60aaaed

Browse files
committed
Updated examples 21 June 2018
2 parents 773cfbf + 506935c commit 60aaaed

File tree

187 files changed

+6401
-2266
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

187 files changed

+6401
-2266
lines changed

Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ doc:
2323
cd doc; \
2424
python make.py clean; \
2525
python make.py html
26+
python make.py spellcheck

asv_bench/benchmarks/categoricals.py

+17
Original file line numberDiff line numberDiff line change
@@ -193,3 +193,20 @@ def time_categorical_series_is_monotonic_increasing(self):
193193

194194
def time_categorical_series_is_monotonic_decreasing(self):
195195
self.s.is_monotonic_decreasing
196+
197+
198+
class Contains(object):
199+
200+
goal_time = 0.2
201+
202+
def setup(self):
203+
N = 10**5
204+
self.ci = tm.makeCategoricalIndex(N)
205+
self.c = self.ci.values
206+
self.key = self.ci.categories[0]
207+
208+
def time_categorical_index_contains(self):
209+
self.key in self.ci
210+
211+
def time_categorical_contains(self):
212+
self.key in self.c

asv_bench/benchmarks/frame_methods.py

+18
Original file line numberDiff line numberDiff line change
@@ -512,3 +512,21 @@ def time_nlargest(self, keep):
512512

513513
def time_nsmallest(self, keep):
514514
self.df.nsmallest(100, 'A', keep=keep)
515+
516+
517+
class Describe(object):
518+
519+
goal_time = 0.2
520+
521+
def setup(self):
522+
self.df = DataFrame({
523+
'a': np.random.randint(0, 100, int(1e6)),
524+
'b': np.random.randint(0, 100, int(1e6)),
525+
'c': np.random.randint(0, 100, int(1e6))
526+
})
527+
528+
def time_series_describe(self):
529+
self.df['a'].describe()
530+
531+
def time_dataframe_describe(self):
532+
self.df.describe()

asv_bench/benchmarks/groupby.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77
from pandas import (DataFrame, Series, MultiIndex, date_range, period_range,
8-
TimeGrouper, Categorical)
8+
TimeGrouper, Categorical, Timestamp)
99
import pandas.util.testing as tm
1010

1111
from .pandas_vb_common import setup # noqa
@@ -385,6 +385,25 @@ def time_dtype_as_field(self, dtype, method, application):
385385
self.as_field_method()
386386

387387

388+
class RankWithTies(object):
389+
# GH 21237
390+
goal_time = 0.2
391+
param_names = ['dtype', 'tie_method']
392+
params = [['float64', 'float32', 'int64', 'datetime64'],
393+
['first', 'average', 'dense', 'min', 'max']]
394+
395+
def setup(self, dtype, tie_method):
396+
N = 10**4
397+
if dtype == 'datetime64':
398+
data = np.array([Timestamp("2011/01/01")] * N, dtype=dtype)
399+
else:
400+
data = np.array([1] * N, dtype=dtype)
401+
self.df = DataFrame({'values': data, 'key': ['foo'] * N})
402+
403+
def time_rank_ties(self, dtype, tie_method):
404+
self.df.groupby('key').rank(method=tie_method)
405+
406+
388407
class Float32(object):
389408
# GH 13335
390409
goal_time = 0.2

ci/environment-dev.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ dependencies:
1313
- pytz
1414
- setuptools>=24.2.0
1515
- sphinx
16+
- sphinxcontrib-spelling

ci/requirements_dev.txt

+1
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ python-dateutil>=2.5.0
99
pytz
1010
setuptools>=24.2.0
1111
sphinx
12+
sphinxcontrib-spelling

doc/make.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,9 @@ def _sphinx_build(self, kind):
224224
--------
225225
>>> DocBuilder(num_jobs=4)._sphinx_build('html')
226226
"""
227-
if kind not in ('html', 'latex'):
228-
raise ValueError('kind must be html or latex, not {}'.format(kind))
227+
if kind not in ('html', 'latex', 'spelling'):
228+
raise ValueError('kind must be html, latex or '
229+
'spelling, not {}'.format(kind))
229230

230231
self._run_os('sphinx-build',
231232
'-j{}'.format(self.num_jobs),
@@ -304,6 +305,18 @@ def zip_html(self):
304305
'-q',
305306
*fnames)
306307

308+
def spellcheck(self):
309+
"""Spell check the documentation."""
310+
self._sphinx_build('spelling')
311+
output_location = os.path.join('build', 'spelling', 'output.txt')
312+
with open(output_location) as output:
313+
lines = output.readlines()
314+
if lines:
315+
raise SyntaxError(
316+
'Found misspelled words.'
317+
' Check pandas/doc/build/spelling/output.txt'
318+
' for more details.')
319+
307320

308321
def main():
309322
cmds = [method for method in dir(DocBuilder) if not method.startswith('_')]

doc/source/_static/favicon.ico

3.81 KB
Binary file not shown.

doc/source/advanced.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ As usual, **both sides** of the slicers are included as this is label indexing.
342342
columns=micolumns).sort_index().sort_index(axis=1)
343343
dfmi
344344
345-
Basic multi-index slicing using slices, lists, and labels.
345+
Basic MultiIndex slicing using slices, lists, and labels.
346346

347347
.. ipython:: python
348348
@@ -1039,7 +1039,7 @@ On the other hand, if the index is not monotonic, then both slice bounds must be
10391039
KeyError: 'Cannot get right slice bound for non-unique label: 3'
10401040
10411041
:meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` only check that
1042-
an index is weakly monotonic. To check for strict montonicity, you can combine one of those with
1042+
an index is weakly monotonic. To check for strict monotonicity, you can combine one of those with
10431043
:meth:`Index.is_unique`
10441044
10451045
.. ipython:: python

doc/source/api.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -1200,9 +1200,9 @@ Attributes and underlying data
12001200
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
12011201
**Axes**
12021202

1203-
* **items**: axis 0; each item corresponds to a DataFrame contained inside
1204-
* **major_axis**: axis 1; the index (rows) of each of the DataFrames
1205-
* **minor_axis**: axis 2; the columns of each of the DataFrames
1203+
* **items**: axis 0; each item corresponds to a DataFrame contained inside
1204+
* **major_axis**: axis 1; the index (rows) of each of the DataFrames
1205+
* **minor_axis**: axis 2; the columns of each of the DataFrames
12061206

12071207
.. autosummary::
12081208
:toctree: generated/

doc/source/basics.rst

+29-30
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,8 @@ Attributes and the raw ndarray(s)
5050

5151
pandas objects have a number of attributes enabling you to access the metadata
5252

53-
* **shape**: gives the axis dimensions of the object, consistent with ndarray
54-
* Axis labels
55-
53+
* **shape**: gives the axis dimensions of the object, consistent with ndarray
54+
* Axis labels
5655
* **Series**: *index* (only axis)
5756
* **DataFrame**: *index* (rows) and *columns*
5857
* **Panel**: *items*, *major_axis*, and *minor_axis*
@@ -131,9 +130,9 @@ Flexible binary operations
131130
With binary operations between pandas data structures, there are two key points
132131
of interest:
133132

134-
* Broadcasting behavior between higher- (e.g. DataFrame) and
135-
lower-dimensional (e.g. Series) objects.
136-
* Missing data in computations.
133+
* Broadcasting behavior between higher- (e.g. DataFrame) and
134+
lower-dimensional (e.g. Series) objects.
135+
* Missing data in computations.
137136

138137
We will demonstrate how to manage these issues independently, though they can
139138
be handled simultaneously.
@@ -168,7 +167,7 @@ either match on the *index* or *columns* via the **axis** keyword:
168167
169168
df_orig = df
170169
171-
Furthermore you can align a level of a multi-indexed DataFrame with a Series.
170+
Furthermore you can align a level of a MultiIndexed DataFrame with a Series.
172171

173172
.. ipython:: python
174173
@@ -462,10 +461,10 @@ produce an object of the same size. Generally speaking, these methods take an
462461
**axis** argument, just like *ndarray.{sum, std, ...}*, but the axis can be
463462
specified by name or integer:
464463

465-
- **Series**: no axis argument needed
466-
- **DataFrame**: "index" (axis=0, default), "columns" (axis=1)
467-
- **Panel**: "items" (axis=0), "major" (axis=1, default), "minor"
468-
(axis=2)
464+
* **Series**: no axis argument needed
465+
* **DataFrame**: "index" (axis=0, default), "columns" (axis=1)
466+
* **Panel**: "items" (axis=0), "major" (axis=1, default), "minor"
467+
(axis=2)
469468

470469
For example:
471470

@@ -593,7 +592,7 @@ categorical columns:
593592
frame = pd.DataFrame({'a': ['Yes', 'Yes', 'No', 'No'], 'b': range(4)})
594593
frame.describe()
595594
596-
This behaviour can be controlled by providing a list of types as ``include``/``exclude``
595+
This behavior can be controlled by providing a list of types as ``include``/``exclude``
597596
arguments. The special value ``all`` can also be used:
598597

599598
.. ipython:: python
@@ -1034,7 +1033,7 @@ Passing a single function to ``.transform()`` with a ``Series`` will yield a sin
10341033
Transform with multiple functions
10351034
+++++++++++++++++++++++++++++++++
10361035

1037-
Passing multiple functions will yield a column multi-indexed DataFrame.
1036+
Passing multiple functions will yield a column MultiIndexed DataFrame.
10381037
The first level will be the original frame column names; the second level
10391038
will be the names of the transforming functions.
10401039

@@ -1060,7 +1059,7 @@ Passing a dict of functions will allow selective transforming per column.
10601059
10611060
tsdf.transform({'A': np.abs, 'B': lambda x: x+1})
10621061
1063-
Passing a dict of lists will generate a multi-indexed DataFrame with these
1062+
Passing a dict of lists will generate a MultiIndexed DataFrame with these
10641063
selective transforms.
10651064

10661065
.. ipython:: python
@@ -1187,11 +1186,11 @@ It is used to implement nearly all other features relying on label-alignment
11871186
functionality. To *reindex* means to conform the data to match a given set of
11881187
labels along a particular axis. This accomplishes several things:
11891188

1190-
* Reorders the existing data to match a new set of labels
1191-
* Inserts missing value (NA) markers in label locations where no data for
1192-
that label existed
1193-
* If specified, **fill** data for missing labels using logic (highly relevant
1194-
to working with time series data)
1189+
* Reorders the existing data to match a new set of labels
1190+
* Inserts missing value (NA) markers in label locations where no data for
1191+
that label existed
1192+
* If specified, **fill** data for missing labels using logic (highly relevant
1193+
to working with time series data)
11951194

11961195
Here is a simple example:
11971196

@@ -1889,12 +1888,12 @@ faster than sorting the entire Series and calling ``head(n)`` on the result.
18891888
df.nsmallest(5, ['a', 'c'])
18901889
18911890
1892-
.. _basics.multi-index_sorting:
1891+
.. _basics.multiindex_sorting:
18931892

1894-
Sorting by a multi-index column
1895-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1893+
Sorting by a MultiIndex column
1894+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
18961895

1897-
You must be explicit about sorting when the column is a multi-index, and fully specify
1896+
You must be explicit about sorting when the column is a MultiIndex, and fully specify
18981897
all levels to ``by``.
18991898

19001899
.. ipython:: python
@@ -1911,10 +1910,10 @@ the axis indexes, since they are immutable) and returns a new object. Note that
19111910
**it is seldom necessary to copy objects**. For example, there are only a
19121911
handful of ways to alter a DataFrame *in-place*:
19131912

1914-
* Inserting, deleting, or modifying a column.
1915-
* Assigning to the ``index`` or ``columns`` attributes.
1916-
* For homogeneous data, directly modifying the values via the ``values``
1917-
attribute or advanced indexing.
1913+
* Inserting, deleting, or modifying a column.
1914+
* Assigning to the ``index`` or ``columns`` attributes.
1915+
* For homogeneous data, directly modifying the values via the ``values``
1916+
attribute or advanced indexing.
19181917

19191918
To be clear, no pandas method has the side effect of modifying your data;
19201919
almost every method returns a new object, leaving the original object
@@ -2112,22 +2111,22 @@ Because the data was transposed the original inference stored all columns as obj
21122111
The following functions are available for one dimensional object arrays or scalars to perform
21132112
hard conversion of objects to a specified type:
21142113

2115-
- :meth:`~pandas.to_numeric` (conversion to numeric dtypes)
2114+
* :meth:`~pandas.to_numeric` (conversion to numeric dtypes)
21162115

21172116
.. ipython:: python
21182117
21192118
m = ['1.1', 2, 3]
21202119
pd.to_numeric(m)
21212120
2122-
- :meth:`~pandas.to_datetime` (conversion to datetime objects)
2121+
* :meth:`~pandas.to_datetime` (conversion to datetime objects)
21232122

21242123
.. ipython:: python
21252124
21262125
import datetime
21272126
m = ['2016-07-09', datetime.datetime(2016, 3, 2)]
21282127
pd.to_datetime(m)
21292128
2130-
- :meth:`~pandas.to_timedelta` (conversion to timedelta objects)
2129+
* :meth:`~pandas.to_timedelta` (conversion to timedelta objects)
21312130

21322131
.. ipython:: python
21332132

doc/source/categorical.rst

+5-5
Original file line numberDiff line numberDiff line change
@@ -542,11 +542,11 @@ Comparisons
542542

543543
Comparing categorical data with other objects is possible in three cases:
544544

545-
* Comparing equality (``==`` and ``!=``) to a list-like object (list, Series, array,
546-
...) of the same length as the categorical data.
547-
* All comparisons (``==``, ``!=``, ``>``, ``>=``, ``<``, and ``<=``) of categorical data to
548-
another categorical Series, when ``ordered==True`` and the `categories` are the same.
549-
* All comparisons of a categorical data to a scalar.
545+
* Comparing equality (``==`` and ``!=``) to a list-like object (list, Series, array,
546+
...) of the same length as the categorical data.
547+
* All comparisons (``==``, ``!=``, ``>``, ``>=``, ``<``, and ``<=``) of categorical data to
548+
another categorical Series, when ``ordered==True`` and the `categories` are the same.
549+
* All comparisons of a categorical data to a scalar.
550550

551551
All other comparisons, especially "non-equality" comparisons of two categoricals with different
552552
categories or a categorical with any list-like object, will raise a ``TypeError``.

doc/source/comparison_with_r.rst

+5-5
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ was started to provide a more detailed look at the `R language
1818
party libraries as they relate to ``pandas``. In comparisons with R and CRAN
1919
libraries, we care about the following things:
2020

21-
- **Functionality / flexibility**: what can/cannot be done with each tool
22-
- **Performance**: how fast are operations. Hard numbers/benchmarks are
23-
preferable
24-
- **Ease-of-use**: Is one tool easier/harder to use (you may have to be
25-
the judge of this, given side-by-side code comparisons)
21+
* **Functionality / flexibility**: what can/cannot be done with each tool
22+
* **Performance**: how fast are operations. Hard numbers/benchmarks are
23+
preferable
24+
* **Ease-of-use**: Is one tool easier/harder to use (you may have to be
25+
the judge of this, given side-by-side code comparisons)
2626

2727
This page is also here to offer a bit of a translation guide for users of these
2828
R packages.

0 commit comments

Comments
 (0)