From d6072be4bbca1d9a25386ef10ae5a8194ac11bd3 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Thu, 30 Jan 2020 22:30:35 +0200 Subject: [PATCH 01/11] DOC: Fix examples in documentation --- pandas/core/base.py | 28 +++++++++------- pandas/core/construction.py | 5 +-- pandas/core/generic.py | 67 +++++++++++++++++-------------------- 3 files changed, 48 insertions(+), 52 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 05e3302abddbe..68b23465109ec 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1,6 +1,7 @@ """ Base and utility classes for pandas objects. """ + import builtins import textwrap from typing import Dict, FrozenSet, List, Optional, Union @@ -1455,42 +1456,43 @@ def factorize(self, sort=False, na_sentinel=-1): Examples -------- - - >>> x = pd.Series([1, 2, 3]) - >>> x + >>> ser = pd.Series([1, 2, 3]) + >>> ser 0 1 1 2 2 3 dtype: int64 - >>> x.searchsorted(4) + >>> ser.searchsorted(4) 3 - >>> x.searchsorted([0, 4]) + >>> ser.searchsorted([0, 4]) array([0, 3]) - >>> x.searchsorted([1, 3], side='left') + >>> ser.searchsorted([1, 3], side="left") array([0, 2]) - >>> x.searchsorted([1, 3], side='right') + >>> ser.searchsorted([1, 3], side="right") array([1, 3]) - >>> x = pd.Categorical(['apple', 'bread', 'bread', - 'cheese', 'milk'], ordered=True) + >>> ser = pd.Categorical( + ... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True + ... ) + >>> ser [apple, bread, bread, cheese, milk] Categories (4, object): [apple < bread < cheese < milk] - >>> x.searchsorted('bread') + >>> ser.searchsorted('bread') 1 - >>> x.searchsorted(['bread'], side='right') + >>> ser.searchsorted(['bread'], side='right') array([3]) If the values are not monotonically sorted, wrong locations may be returned: - >>> x = pd.Series([2, 1, 3]) - >>> x.searchsorted(1) + >>> ser = pd.Series([2, 1, 3]) + >>> ser.searchsorted(1) # doctest: +SKIP 0 # wrong result, correct would be 1 """ diff --git a/pandas/core/construction.py b/pandas/core/construction.py index f947a1fda49f1..e2d8fba8d4148 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -4,6 +4,7 @@ These should not depend on core.internals. """ + from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast import numpy as np @@ -200,12 +201,12 @@ def array( >>> pd.array([1, 2, np.nan]) - [1, 2, NaN] + [1, 2, ] Length: 3, dtype: Int64 >>> pd.array(["a", None, "c"]) - ['a', nan, 'c'] + ['a', , 'c'] Length: 3, dtype: string >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")]) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a2e348bf98e33..0bdb24e891ad5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -664,7 +664,7 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: 6 7 8 10 11 12 - >>> df.droplevel('level2', axis=1) + >>> df.droplevel('level_2', axis=1) level_1 c d a b 1 2 3 4 @@ -1235,7 +1235,7 @@ def _set_axis_name(self, name, axis=0, inplace=False): >>> df.index = pd.MultiIndex.from_product( ... [["mammal"], ['dog', 'cat', 'monkey']]) >>> df._set_axis_name(["type", "name"]) - legs + num_legs type name mammal dog 4 cat 4 @@ -2207,14 +2207,14 @@ def to_json( Examples -------- + >>> df = pd.DataFrame( + ... [["a", "b"], ["c", "d"]], + ... index=["row 1", "row 2"], + ... columns=["col 1", "col 2"], + ... ) - >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], - ... index=['row 1', 'row 2'], - ... columns=['col 1', 'col 2']) >>> df.to_json(orient='split') - '{"columns":["col 1","col 2"], - "index":["row 1","row 2"], - "data":[["a","b"],["c","d"]]}' + '{"columns":["col 1","col 2"],"index":["row 1","row 2"],"data":[["a","b"],["c","d"]]}' Encoding/decoding a Dataframe using ``'records'`` formatted JSON. Note that index labels are not preserved with this encoding. @@ -2240,15 +2240,8 @@ def to_json( Encoding with Table Schema >>> df.to_json(orient='table') - '{"schema": {"fields": [{"name": "index", "type": "string"}, - {"name": "col 1", "type": "string"}, - {"name": "col 2", "type": "string"}], - "primaryKey": "index", - "pandas_version": "0.20.0"}, - "data": [{"index": "row 1", "col 1": "a", "col 2": "b"}, - {"index": "row 2", "col 1": "c", "col 2": "d"}]}' + '{"schema":{"fields":[{"name":"index","type":"string"},{"name":"col 1","type":"string"},{"name":"col 2","type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},"data":[{"index":"row 1","col 1":"a","col 2":"b"},{"index":"row 2","col 1":"c","col 2":"d"}]}' """ - from pandas.io import json if date_format is None and orient == "table": @@ -4922,18 +4915,17 @@ def sample( Notes ----- - Use ``.pipe`` when chaining together functions that expect Series, DataFrames or GroupBy objects. Instead of writing - >>> f(g(h(df), arg1=a), arg2=b, arg3=c) + >>> func(g(h(df), arg1=a), arg2=b, arg3=c) # doctest: +SKIP You can write >>> (df.pipe(h) ... .pipe(g, arg1=a) - ... .pipe(f, arg2=b, arg3=c) - ... ) + ... .pipe(func, arg2=b, arg3=c) + ... ) # doctest: +SKIP If you have a function that takes the data as (say) the second argument, pass a tuple indicating which keyword expects the @@ -4941,8 +4933,8 @@ def sample( >>> (df.pipe(h) ... .pipe(g, arg1=a) - ... .pipe((f, 'arg2'), arg1=a, arg3=c) - ... ) + ... .pipe((func, 'arg2'), arg1=a, arg3=c) + ... ) # doctest: +SKIP """ @Appender(_shared_docs["pipe"] % _shared_doc_kwargs) @@ -5290,7 +5282,7 @@ def values(self) -> np.ndarray: dtype: object >>> df.values array([[ 3, 94, 31], - [ 29, 170, 115]], dtype=int64) + [ 29, 170, 115]]) A DataFrame with mixed type columns(e.g., str/object, int64, float32) results in an ndarray of the broadest type that accommodates these @@ -9547,12 +9539,13 @@ def describe( ... np.datetime64("2010-01-01") ... ]) >>> s.describe() - count 3 - unique 2 - top 2010-01-01 00:00:00 - freq 2 - first 2000-01-01 00:00:00 - last 2010-01-01 00:00:00 + count 3 + mean 2006-09-01 08:00:00 + min 2000-01-01 00:00:00 + 25% 2004-12-31 12:00:00 + 50% 2010-01-01 00:00:00 + 75% 2010-01-01 00:00:00 + max 2010-01-01 00:00:00 dtype: object Describing a ``DataFrame``. By default only numeric fields @@ -9575,11 +9568,11 @@ def describe( Describing all columns of a ``DataFrame`` regardless of data type. - >>> df.describe(include='all') - categorical numeric object + >>> df.describe(include='all') # doctest: +SKIP + categorical numeric object count 3 3.0 3 unique 3 NaN 3 - top f NaN c + top f NaN a freq 1 NaN 1 mean NaN 2.0 NaN std NaN 1.0 NaN @@ -9618,11 +9611,11 @@ def describe( Including only string columns in a ``DataFrame`` description. - >>> df.describe(include=[np.object]) + >>> df.describe(include=[np.object]) # doctest: +SKIP object count 3 unique 3 - top c + top a freq 1 Including only categorical columns from a ``DataFrame`` description. @@ -9636,16 +9629,16 @@ def describe( Excluding numeric columns from a ``DataFrame`` description. - >>> df.describe(exclude=[np.number]) + >>> df.describe(exclude=[np.number]) # doctest: +SKIP categorical object count 3 3 unique 3 3 - top f c + top f a freq 1 1 Excluding object columns from a ``DataFrame`` description. - >>> df.describe(exclude=[np.object]) + >>> df.describe(exclude=[np.object]) # doctest: +SKIP categorical numeric count 3 3.0 unique 3 NaN From 65d29cd84d50b1d7ef290b1b612016d427acf1eb Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sat, 1 Feb 2020 12:28:57 +0200 Subject: [PATCH 02/11] Fix merge conflicts --- pandas/core/base.py | 94 ++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 49 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 68b23465109ec..9fe1af776dd2b 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -46,11 +46,15 @@ class PandasObject(DirNamesMixin): - """baseclass for various pandas objects""" + """ + Baseclass for various pandas objects. + """ @property def _constructor(self): - """class constructor (for this class it's just `__class__`""" + """ + Class constructor (for this class it's just `__class__`. + """ return type(self) def __repr__(self) -> str: @@ -78,16 +82,14 @@ def __sizeof__(self): """ if hasattr(self, "memory_usage"): mem = self.memory_usage(deep=True) - if not is_scalar(mem): - mem = mem.sum() - return int(mem) + return int(mem if is_scalar(mem) else mem.sum()) - # no memory_usage attribute, so fall back to - # object's 'sizeof' + # no memory_usage attribute, so fall back to object's 'sizeof' return super().__sizeof__() def _ensure_type(self: T, obj) -> T: - """Ensure that an object has same type as self. + """ + Ensure that an object has same type as self. Used by type checkers. """ @@ -96,7 +98,8 @@ def _ensure_type(self: T, obj) -> T: class NoNewAttributesMixin: - """Mixin which prevents adding new attributes. + """ + Mixin which prevents adding new attributes. Prevents additional attributes via xxx.attribute = "something" after a call to `self.__freeze()`. Mainly used to prevent the user from using @@ -107,7 +110,9 @@ class NoNewAttributesMixin: """ def _freeze(self): - """Prevents setting additional attributes""" + """ + Prevents setting additional attributes. + """ object.__setattr__(self, "__frozen", True) # prevent adding any attribute via s.xxx.new_attribute = ... @@ -181,14 +186,12 @@ class SelectionMixin: @property def _selection_name(self): """ - return a name for myself; this would ideally be called - the 'name' property, but we cannot conflict with the - Series.name property which can be set + Return a name for myself; + + This would ideally be called the 'name' property, + but we cannot conflict with the Series.name property which can be set. """ - if self._selection is None: - return None # 'result' - else: - return self._selection + return self._selection @property def _selection_list(self): @@ -200,7 +203,6 @@ def _selection_list(self): @cache_readonly def _selected_obj(self): - if self._selection is None or isinstance(self.obj, ABCSeries): return self.obj else: @@ -247,12 +249,11 @@ def _gotitem(self, key, ndim: int, subset=None): Parameters ---------- - key : string / list of selections + key : str / list of selections ndim : 1,2 requested ndim of result subset : object, default None subset to act on - """ raise AbstractMethodError(self) @@ -267,7 +268,6 @@ def _try_aggregate_string_function(self, arg: str, *args, **kwargs): - try to find a function (or attribute) on ourselves - try to find a numpy function - raise - """ assert isinstance(arg, str) @@ -586,7 +586,6 @@ def _shallow_copy(self, obj, **kwargs): """ return a new object with the replacement attributes """ - if isinstance(obj, self._constructor): obj = obj.obj for attr in self._attributes: @@ -670,8 +669,7 @@ def item(self): if len(self) == 1: return next(iter(self)) - else: - raise ValueError("can only convert an array of size 1 to a Python scalar") + raise ValueError("can only convert an array of size 1 to a Python scalar") @property def nbytes(self) -> int: @@ -736,7 +734,6 @@ def array(self) -> ExtensionArray: Examples -------- - For regular NumPy types like int, and float, a PandasArray is returned. @@ -852,12 +849,11 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs): """ if is_extension_array_dtype(self.dtype): return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs) - else: - if kwargs: - msg = "to_numpy() got an unexpected keyword argument '{}'".format( - list(kwargs.keys())[0] - ) - raise TypeError(msg) + elif kwargs: + bad_keys = list(kwargs.keys())[0] + raise TypeError( + f"to_numpy() got an unexpected keyword argument '{bad_keys}'" + ) result = np.asarray(self._values, dtype=dtype) # TODO(GH-24345): Avoid potential double copy @@ -1077,7 +1073,9 @@ def _reduce( filter_type=None, **kwds, ): - """ perform the reduction type operation if we can """ + """ + Perform the reduction type operation if we can. + """ func = getattr(self, name, None) if func is None: raise TypeError( @@ -1104,9 +1102,7 @@ def _map_values(self, mapper, na_action=None): The output of the mapping function applied to the index. If the function returns a tuple with more than one element a MultiIndex will be returned. - """ - # we can fastpath dict/Series to an efficient map # as we know that we are not going to have to yield # python types @@ -1342,7 +1338,9 @@ def is_monotonic(self) -> bool: @property def is_monotonic_increasing(self) -> bool: - """alias for is_monotonic""" + """ + Alias for is_monotonic. + """ # mypy complains if we alias directly return self.is_monotonic @@ -1456,43 +1454,41 @@ def factorize(self, sort=False, na_sentinel=-1): Examples -------- - >>> ser = pd.Series([1, 2, 3]) - >>> ser + >>> x = pd.Series([1, 2, 3]) + >>> x 0 1 1 2 2 3 dtype: int64 - >>> ser.searchsorted(4) + >>> x.searchsorted(4) 3 - >>> ser.searchsorted([0, 4]) + >>> x.searchsorted([0, 4]) array([0, 3]) - >>> ser.searchsorted([1, 3], side="left") + >>> x.searchsorted([1, 3], side='left') array([0, 2]) - >>> ser.searchsorted([1, 3], side="right") + >>> x.searchsorted([1, 3], side='right') array([1, 3]) - >>> ser = pd.Categorical( - ... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True - ... ) - >>> ser + >>> x = pd.Categorical(['apple', 'bread', 'bread', + 'cheese', 'milk'], ordered=True) [apple, bread, bread, cheese, milk] Categories (4, object): [apple < bread < cheese < milk] - >>> ser.searchsorted('bread') + >>> x.searchsorted('bread') 1 - >>> ser.searchsorted(['bread'], side='right') + >>> x.searchsorted(['bread'], side='right') array([3]) If the values are not monotonically sorted, wrong locations may be returned: - >>> ser = pd.Series([2, 1, 3]) - >>> ser.searchsorted(1) # doctest: +SKIP + >>> x = pd.Series([2, 1, 3]) + >>> x.searchsorted(1) 0 # wrong result, correct would be 1 """ From 9e5af034e338fc86d200fdc472d901fe77983226 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sat, 1 Feb 2020 12:32:50 +0200 Subject: [PATCH 03/11] Fixed pandas/core/base.py --- pandas/core/base.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 9fe1af776dd2b..97231b6144d09 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1454,41 +1454,49 @@ def factorize(self, sort=False, na_sentinel=-1): Examples -------- - >>> x = pd.Series([1, 2, 3]) - >>> x + >>> ser = pd.Series([1, 2, 3]) + >>> ser 0 1 1 2 2 3 dtype: int64 - >>> x.searchsorted(4) + >>> ser.searchsorted(4) 3 - >>> x.searchsorted([0, 4]) + >>> ser.searchsorted([0, 4]) array([0, 3]) - >>> x.searchsorted([1, 3], side='left') + >>> ser.searchsorted([1, 3], side='left') array([0, 2]) - >>> x.searchsorted([1, 3], side='right') + >>> ser.searchsorted([1, 3], side='right') array([1, 3]) - >>> x = pd.Categorical(['apple', 'bread', 'bread', - 'cheese', 'milk'], ordered=True) + >>> ser = pd.Categorical( + ... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True + ... ) + >>> ser [apple, bread, bread, cheese, milk] Categories (4, object): [apple < bread < cheese < milk] - >>> x.searchsorted('bread') + >>> ser.searchsorted('bread') 1 - >>> x.searchsorted(['bread'], side='right') + >>> ser.searchsorted(['bread'], side='right') array([3]) If the values are not monotonically sorted, wrong locations may be returned: - >>> x = pd.Series([2, 1, 3]) - >>> x.searchsorted(1) + >>> ser = pd.Series([2, 1, 3]) + >>> ser + 0 2 + 1 1 + 2 3 + dtype: int64 + + >>> ser.searchsorted(1) # doctest: +SKIP 0 # wrong result, correct would be 1 """ From 316f8509b4986b1d888da26f5bb44663de90efa6 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Fri, 7 Feb 2020 12:50:28 +0200 Subject: [PATCH 04/11] Fixed lint issues --- pandas/core/generic.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4bc8f519f64cd..5e2d6dfb5ea1f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2212,7 +2212,9 @@ def to_json( ... ) >>> df.to_json(orient='split') - '{"columns":["col 1","col 2"],"index":["row 1","row 2"],"data":[["a","b"],["c","d"]]}' + '{"columns":["col 1","col 2"],\ +"index":["row 1","row 2"],\ +"data":[["a","b"],["c","d"]]}' Encoding/decoding a Dataframe using ``'records'`` formatted JSON. Note that index labels are not preserved with this encoding. @@ -2238,7 +2240,11 @@ def to_json( Encoding with Table Schema >>> df.to_json(orient='table') - '{"schema":{"fields":[{"name":"index","type":"string"},{"name":"col 1","type":"string"},{"name":"col 2","type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},"data":[{"index":"row 1","col 1":"a","col 2":"b"},{"index":"row 2","col 1":"c","col 2":"d"}]}' + '{"schema":{"fields":[{"name":"index","type":"string"},\ +{"name":"col 1","type":"string"},{"name":"col 2","type":"string"}],\ +"primaryKey":["index"],"pandas_version":"0.20.0"},\ +"data":[{"index":"row 1","col 1":"a","col 2":"b"},\ +{"index":"row 2","col 1":"c","col 2":"d"}]}' """ from pandas.io import json From 144caa850bca154202b35bb69ab57e59a5ca05ec Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Fri, 14 Feb 2020 12:22:13 +0200 Subject: [PATCH 05/11] Reverted change from fixing merge conflicts --- pandas/core/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f96804b899a05..a23c6908c2c8b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2182,6 +2182,12 @@ def to_json( Examples -------- + >>> df = pd.DataFrame( + ... [["a", "b"], ["c", "d"]], + ... index=["row 1", "row 2"], + ... columns=["col 1", "col 2"], + ... ) + >>> df.to_json(orient='split') '{"columns":["col 1","col 2"],\ "index":["row 1","row 2"],\ From f3dd0435ac088309fdb1aae6ac822626d75eb3e1 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Fri, 14 Feb 2020 12:47:04 +0200 Subject: [PATCH 06/11] to_json examples are now pretty printed --- pandas/core/generic.py | 135 +++++++++++++++++++++++++++++++++++------ 1 file changed, 115 insertions(+), 20 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a23c6908c2c8b..8ef35fd9340ee 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2182,46 +2182,141 @@ def to_json( Examples -------- + >>> import json >>> df = pd.DataFrame( ... [["a", "b"], ["c", "d"]], ... index=["row 1", "row 2"], ... columns=["col 1", "col 2"], ... ) - >>> df.to_json(orient='split') - '{"columns":["col 1","col 2"],\ -"index":["row 1","row 2"],\ -"data":[["a","b"],["c","d"]]}' + >>> result = df.to_json(orient="split") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + { + "columns": [ + "col 1", + "col 2" + ], + "index": [ + "row 1", + "row 2" + ], + "data": [ + [ + "a", + "b" + ], + [ + "c", + "d" + ] + ] + } Encoding/decoding a Dataframe using ``'records'`` formatted JSON. Note that index labels are not preserved with this encoding. - >>> df.to_json(orient='records') - '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]' + >>> result = df.to_json(orient="records") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + [ + { + "col 1": "a", + "col 2": "b" + }, + { + "col 1": "c", + "col 2": "d" + } + ] Encoding/decoding a Dataframe using ``'index'`` formatted JSON: - >>> df.to_json(orient='index') - '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}' + >>> result = df.to_json(orient="index") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + { + "row 1": { + "col 1": "a", + "col 2": "b" + }, + "row 2": { + "col 1": "c", + "col 2": "d" + } + } Encoding/decoding a Dataframe using ``'columns'`` formatted JSON: - >>> df.to_json(orient='columns') - '{"col 1":{"row 1":"a","row 2":"c"},"col 2":{"row 1":"b","row 2":"d"}}' + >>> result = df.to_json(orient="columns") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + { + "col 1": { + "row 1": "a", + "row 2": "c" + }, + "col 2": { + "row 1": "b", + "row 2": "d" + } + } Encoding/decoding a Dataframe using ``'values'`` formatted JSON: - >>> df.to_json(orient='values') - '[["a","b"],["c","d"]]' - - Encoding with Table Schema + >>> result = df.to_json(orient="values") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + [ + [ + "a", + "b" + ], + [ + "c", + "d" + ] + ] - >>> df.to_json(orient='table') - '{"schema":{"fields":[{"name":"index","type":"string"},\ -{"name":"col 1","type":"string"},{"name":"col 2","type":"string"}],\ -"primaryKey":["index"],"pandas_version":"0.20.0"},\ -"data":[{"index":"row 1","col 1":"a","col 2":"b"},\ -{"index":"row 2","col 1":"c","col 2":"d"}]}' + Encoding with Table Schema: + + >>> result = df.to_json(orient="table") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + { + "schema": { + "fields": [ + { + "name": "index", + "type": "string" + }, + { + "name": "col 1", + "type": "string" + }, + { + "name": "col 2", + "type": "string" + } + ], + "primaryKey": [ + "index" + ], + "pandas_version": "0.20.0" + }, + "data": [ + { + "index": "row 1", + "col 1": "a", + "col 2": "b" + }, + { + "index": "row 2", + "col 1": "c", + "col 2": "d" + } + ] + } """ from pandas.io import json From 4d66fa83c8195c0add33bd1540675946d1a0bdad Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Fri, 14 Feb 2020 12:52:03 +0200 Subject: [PATCH 07/11] Added checks to the CI --- ci/code_checks.sh | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 7eb80077c4fab..ed20bc8157cd4 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -268,11 +268,6 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then -k"-nonzero -reindex -searchsorted -to_dict" RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests generic.py' ; echo $MSG - pytest -q --doctest-modules pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard" - RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests groupby.py' ; echo $MSG pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe" RET=$(($RET + $?)) ; echo $MSG "DONE" @@ -313,6 +308,17 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pytest -q --doctest-modules pandas/core/arrays/boolean.py RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Doctests base.py' ; echo $MSG + pytest -q --doctest-modules pandas/core/base.py + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests construction.py' ; echo $MSG + pytest -q --doctest-modules pandas/core/construction.py + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests generic.py' ; echo $MSG + pytest -q --doctest-modules pandas/core/generic.py + RET=$(($RET + $?)) ; echo $MSG "DONE" fi ### DOCSTRINGS ### From 4c304a6e63916ab618ecadd161f11c0cd8decd27 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sat, 15 Feb 2020 13:30:59 +0200 Subject: [PATCH 08/11] Skipping "clipboard" examples as there's no clipborad in the CI --- pandas/core/generic.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ccfffd0c1cf31..403f40d4fa327 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2743,7 +2743,8 @@ def to_clipboard( Copy the contents of a DataFrame to the clipboard. >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) - >>> df.to_clipboard(sep=',') + + >>> df.to_clipboard(sep=',') # doctest: +SKIP ... # Wrote the following to the system clipboard: ... # ,A,B,C ... # 0,1,2,3 @@ -2752,7 +2753,7 @@ def to_clipboard( We can omit the index by passing the keyword `index` and setting it to false. - >>> df.to_clipboard(sep=',', index=False) + >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP ... # Wrote the following to the system clipboard: ... # A,B,C ... # 1,2,3 From d489b45bdae1371a146aac7aaddd29488335e0b0 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Thu, 27 Feb 2020 19:24:46 +0200 Subject: [PATCH 09/11] Reverted the wrong merge error --- ci/code_checks.sh | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 6ad971a5eb1d4..8cf9f164d140a 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -267,12 +267,6 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then -k"-nonzero -reindex -searchsorted -to_dict" RET=$(($RET + $?)) ; echo $MSG "DONE" - - MSG='Doctests generic.py' ; echo $MSG - pytest -q --doctest-modules pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard" - RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests groupby.py' ; echo $MSG pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe" RET=$(($RET + $?)) ; echo $MSG "DONE" From 44ca5d5d68ade678d253e9aebd37054f94fdb079 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Thu, 27 Feb 2020 19:31:57 +0200 Subject: [PATCH 10/11] Added extra space to the doctest skip comment REF: https://github.com/pandas-dev/pandas/pull/31472#issuecomment-591624321 --- pandas/core/base.py | 2 +- pandas/core/generic.py | 30 +++++++++++++++--------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 83b85249e72bd..f2b678500a985 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1522,7 +1522,7 @@ def factorize(self, sort=False, na_sentinel=-1): 2 3 dtype: int64 - >>> ser.searchsorted(1) # doctest: +SKIP + >>> ser.searchsorted(1) # doctest: +SKIP 0 # wrong result, correct would be 1 """ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5191aa04498bd..5733e4dc99c68 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2190,7 +2190,7 @@ def to_json( >>> result = df.to_json(orient="split") >>> parsed = json.loads(result) - >>> json.dumps(parsed, indent=4) # doctest: +SKIP + >>> json.dumps(parsed, indent=4) # doctest: +SKIP { "columns": [ "col 1", @@ -2217,7 +2217,7 @@ def to_json( >>> result = df.to_json(orient="records") >>> parsed = json.loads(result) - >>> json.dumps(parsed, indent=4) # doctest: +SKIP + >>> json.dumps(parsed, indent=4) # doctest: +SKIP [ { "col 1": "a", @@ -2233,7 +2233,7 @@ def to_json( >>> result = df.to_json(orient="index") >>> parsed = json.loads(result) - >>> json.dumps(parsed, indent=4) # doctest: +SKIP + >>> json.dumps(parsed, indent=4) # doctest: +SKIP { "row 1": { "col 1": "a", @@ -2249,7 +2249,7 @@ def to_json( >>> result = df.to_json(orient="columns") >>> parsed = json.loads(result) - >>> json.dumps(parsed, indent=4) # doctest: +SKIP + >>> json.dumps(parsed, indent=4) # doctest: +SKIP { "col 1": { "row 1": "a", @@ -2265,7 +2265,7 @@ def to_json( >>> result = df.to_json(orient="values") >>> parsed = json.loads(result) - >>> json.dumps(parsed, indent=4) # doctest: +SKIP + >>> json.dumps(parsed, indent=4) # doctest: +SKIP [ [ "a", @@ -2281,7 +2281,7 @@ def to_json( >>> result = df.to_json(orient="table") >>> parsed = json.loads(result) - >>> json.dumps(parsed, indent=4) # doctest: +SKIP + >>> json.dumps(parsed, indent=4) # doctest: +SKIP { "schema": { "fields": [ @@ -2743,7 +2743,7 @@ def to_clipboard( >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) - >>> df.to_clipboard(sep=',') # doctest: +SKIP + >>> df.to_clipboard(sep=',') # doctest: +SKIP ... # Wrote the following to the system clipboard: ... # ,A,B,C ... # 0,1,2,3 @@ -2752,7 +2752,7 @@ def to_clipboard( We can omit the index by passing the keyword `index` and setting it to false. - >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP + >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP ... # Wrote the following to the system clipboard: ... # A,B,C ... # 1,2,3 @@ -4987,14 +4987,14 @@ def sample( Use ``.pipe`` when chaining together functions that expect Series, DataFrames or GroupBy objects. Instead of writing - >>> func(g(h(df), arg1=a), arg2=b, arg3=c) # doctest: +SKIP + >>> func(g(h(df), arg1=a), arg2=b, arg3=c) # doctest: +SKIP You can write >>> (df.pipe(h) ... .pipe(g, arg1=a) ... .pipe(func, arg2=b, arg3=c) - ... ) # doctest: +SKIP + ... ) # doctest: +SKIP If you have a function that takes the data as (say) the second argument, pass a tuple indicating which keyword expects the @@ -5003,7 +5003,7 @@ def sample( >>> (df.pipe(h) ... .pipe(g, arg1=a) ... .pipe((func, 'arg2'), arg1=a, arg3=c) - ... ) # doctest: +SKIP + ... ) # doctest: +SKIP """ @Appender(_shared_docs["pipe"] % _shared_doc_kwargs) @@ -9636,7 +9636,7 @@ def describe( Describing all columns of a ``DataFrame`` regardless of data type. - >>> df.describe(include='all') # doctest: +SKIP + >>> df.describe(include='all') # doctest: +SKIP categorical numeric object count 3 3.0 3 unique 3 NaN 3 @@ -9679,7 +9679,7 @@ def describe( Including only string columns in a ``DataFrame`` description. - >>> df.describe(include=[np.object]) # doctest: +SKIP + >>> df.describe(include=[np.object]) # doctest: +SKIP object count 3 unique 3 @@ -9697,7 +9697,7 @@ def describe( Excluding numeric columns from a ``DataFrame`` description. - >>> df.describe(exclude=[np.number]) # doctest: +SKIP + >>> df.describe(exclude=[np.number]) # doctest: +SKIP categorical object count 3 3 unique 3 3 @@ -9706,7 +9706,7 @@ def describe( Excluding object columns from a ``DataFrame`` description. - >>> df.describe(exclude=[np.object]) # doctest: +SKIP + >>> df.describe(exclude=[np.object]) # doctest: +SKIP categorical numeric count 3 3.0 unique 3 NaN From 94ec83d0ab3416cd968568d93b81ff9014e48076 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Thu, 27 Feb 2020 19:35:45 +0200 Subject: [PATCH 11/11] Removed a single space --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5733e4dc99c68..b03ba2d325db5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2190,7 +2190,7 @@ def to_json( >>> result = df.to_json(orient="split") >>> parsed = json.loads(result) - >>> json.dumps(parsed, indent=4) # doctest: +SKIP + >>> json.dumps(parsed, indent=4) # doctest: +SKIP { "columns": [ "col 1",