Skip to content

DOC: Fix examples in documentation #31472

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Mar 7, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 20 additions & 12 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1451,41 +1451,49 @@ def factorize(self, sort=False, na_sentinel=-1):

Examples
--------
>>> x = pd.Series([1, 2, 3])
>>> x
>>> ser = pd.Series([1, 2, 3])
>>> ser
0 1
1 2
2 3
dtype: int64

>>> x.searchsorted(4)
>>> ser.searchsorted(4)
3

>>> x.searchsorted([0, 4])
>>> ser.searchsorted([0, 4])
array([0, 3])

>>> x.searchsorted([1, 3], side='left')
>>> ser.searchsorted([1, 3], side='left')
array([0, 2])

>>> x.searchsorted([1, 3], side='right')
>>> ser.searchsorted([1, 3], side='right')
array([1, 3])

>>> x = pd.Categorical(['apple', 'bread', 'bread',
'cheese', 'milk'], ordered=True)
>>> ser = pd.Categorical(
... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True
... )
>>> ser
[apple, bread, bread, cheese, milk]
Categories (4, object): [apple < bread < cheese < milk]

>>> x.searchsorted('bread')
>>> ser.searchsorted('bread')
1

>>> x.searchsorted(['bread'], side='right')
>>> ser.searchsorted(['bread'], side='right')
array([3])

If the values are not monotonically sorted, wrong locations
may be returned:

>>> x = pd.Series([2, 1, 3])
>>> x.searchsorted(1)
>>> ser = pd.Series([2, 1, 3])
>>> ser
0 2
1 1
2 3
dtype: int64

>>> ser.searchsorted(1) # doctest: +SKIP
0 # wrong result, correct would be 1
"""

Expand Down
5 changes: 3 additions & 2 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

These should not depend on core.internals.
"""

from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast

import numpy as np
Expand Down Expand Up @@ -200,12 +201,12 @@ def array(

>>> pd.array([1, 2, np.nan])
<IntegerArray>
[1, 2, NaN]
[1, 2, <NA>]
Length: 3, dtype: Int64

>>> pd.array(["a", None, "c"])
<StringArray>
['a', nan, 'c']
['a', <NA>, 'c']
Length: 3, dtype: string

>>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])
Expand Down
73 changes: 36 additions & 37 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,7 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries:
6 7 8
10 11 12

>>> df.droplevel('level2', axis=1)
>>> df.droplevel('level_2', axis=1)
level_1 c d
a b
1 2 3 4
Expand Down Expand Up @@ -1235,7 +1235,7 @@ def _set_axis_name(self, name, axis=0, inplace=False):
>>> df.index = pd.MultiIndex.from_product(
... [["mammal"], ['dog', 'cat', 'monkey']])
>>> df._set_axis_name(["type", "name"])
legs
num_legs
type name
mammal dog 4
cat 4
Expand Down Expand Up @@ -2205,14 +2205,16 @@ def to_json(

Examples
--------
>>> df = pd.DataFrame(
... [["a", "b"], ["c", "d"]],
... index=["row 1", "row 2"],
... columns=["col 1", "col 2"],
... )

>>> df = pd.DataFrame([['a', 'b'], ['c', 'd']],
... index=['row 1', 'row 2'],
... columns=['col 1', 'col 2'])
>>> df.to_json(orient='split')
'{"columns":["col 1","col 2"],
"index":["row 1","row 2"],
"data":[["a","b"],["c","d"]]}'
'{"columns":["col 1","col 2"],\
"index":["row 1","row 2"],\
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you use ... instead here?

"data":[["a","b"],["c","d"]]}'

Encoding/decoding a Dataframe using ``'records'`` formatted JSON.
Note that index labels are not preserved with this encoding.
Expand All @@ -2238,15 +2240,12 @@ def to_json(
Encoding with Table Schema

>>> df.to_json(orient='table')
'{"schema": {"fields": [{"name": "index", "type": "string"},
{"name": "col 1", "type": "string"},
{"name": "col 2", "type": "string"}],
"primaryKey": "index",
"pandas_version": "0.20.0"},
"data": [{"index": "row 1", "col 1": "a", "col 2": "b"},
{"index": "row 2", "col 1": "c", "col 2": "d"}]}'
'{"schema":{"fields":[{"name":"index","type":"string"},\
{"name":"col 1","type":"string"},{"name":"col 2","type":"string"}],\
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you use ... here? (else could do a json prettify, e.g. json.dump(...., indent=4)

"primaryKey":["index"],"pandas_version":"0.20.0"},\
"data":[{"index":"row 1","col 1":"a","col 2":"b"},\
{"index":"row 2","col 1":"c","col 2":"d"}]}'
"""

from pandas.io import json

if date_format is None and orient == "table":
Expand Down Expand Up @@ -4921,27 +4920,26 @@ def sample(

Notes
-----

Use ``.pipe`` when chaining together functions that expect
Series, DataFrames or GroupBy objects. Instead of writing

>>> f(g(h(df), arg1=a), arg2=b, arg3=c)
>>> func(g(h(df), arg1=a), arg2=b, arg3=c) # doctest: +SKIP

You can write

>>> (df.pipe(h)
... .pipe(g, arg1=a)
... .pipe(f, arg2=b, arg3=c)
... )
... .pipe(func, arg2=b, arg3=c)
... ) # doctest: +SKIP

If you have a function that takes the data as (say) the second
argument, pass a tuple indicating which keyword expects the
data. For example, suppose ``f`` takes its data as ``arg2``:

>>> (df.pipe(h)
... .pipe(g, arg1=a)
... .pipe((f, 'arg2'), arg1=a, arg3=c)
... )
... .pipe((func, 'arg2'), arg1=a, arg3=c)
... ) # doctest: +SKIP
"""

@Appender(_shared_docs["pipe"] % _shared_doc_kwargs)
Expand Down Expand Up @@ -5289,7 +5287,7 @@ def values(self) -> np.ndarray:
dtype: object
>>> df.values
array([[ 3, 94, 31],
[ 29, 170, 115]], dtype=int64)
[ 29, 170, 115]])

A DataFrame with mixed type columns(e.g., str/object, int64, float32)
results in an ndarray of the broadest type that accommodates these
Expand Down Expand Up @@ -9546,12 +9544,13 @@ def describe(
... np.datetime64("2010-01-01")
... ])
>>> s.describe()
count 3
unique 2
top 2010-01-01 00:00:00
freq 2
first 2000-01-01 00:00:00
last 2010-01-01 00:00:00
count 3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these on the doctest list that we check?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since 4d66fa8 they are.

mean 2006-09-01 08:00:00
min 2000-01-01 00:00:00
25% 2004-12-31 12:00:00
50% 2010-01-01 00:00:00
75% 2010-01-01 00:00:00
max 2010-01-01 00:00:00
dtype: object

Describing a ``DataFrame``. By default only numeric fields
Expand All @@ -9574,11 +9573,11 @@ def describe(

Describing all columns of a ``DataFrame`` regardless of data type.

>>> df.describe(include='all')
categorical numeric object
>>> df.describe(include='all') # doctest: +SKIP
categorical numeric object
count 3 3.0 3
unique 3 NaN 3
top f NaN c
top f NaN a
freq 1 NaN 1
mean NaN 2.0 NaN
std NaN 1.0 NaN
Expand Down Expand Up @@ -9617,11 +9616,11 @@ def describe(

Including only string columns in a ``DataFrame`` description.

>>> df.describe(include=[np.object])
>>> df.describe(include=[np.object]) # doctest: +SKIP
object
count 3
unique 3
top c
top a
freq 1

Including only categorical columns from a ``DataFrame`` description.
Expand All @@ -9635,16 +9634,16 @@ def describe(

Excluding numeric columns from a ``DataFrame`` description.

>>> df.describe(exclude=[np.number])
>>> df.describe(exclude=[np.number]) # doctest: +SKIP
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How come you are adding doctest skips opposed to our current pytest -k approach? Think we should be consistent.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem with describe, is that the output can be random, If you know how to skip a specific line in the output, it would be great!

categorical object
count 3 3
unique 3 3
top f c
top f a
freq 1 1

Excluding object columns from a ``DataFrame`` description.

>>> df.describe(exclude=[np.object])
>>> df.describe(exclude=[np.object]) # doctest: +SKIP
categorical numeric
count 3 3.0
unique 3 NaN
Expand Down