From d6072be4bbca1d9a25386ef10ae5a8194ac11bd3 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Thu, 30 Jan 2020 22:30:35 +0200
Subject: [PATCH 01/11] DOC: Fix examples in documentation

---
 pandas/core/base.py         | 28 +++++++++-------
 pandas/core/construction.py |  5 +--
 pandas/core/generic.py      | 67 +++++++++++++++++--------------------
 3 files changed, 48 insertions(+), 52 deletions(-)
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 05e3302abddbe..68b23465109ec 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1,6 +1,7 @@
 """
 Base and utility classes for pandas objects.
 """
+
 import builtins
 import textwrap
 from typing import Dict, FrozenSet, List, Optional, Union
@@ -1455,42 +1456,43 @@ def factorize(self, sort=False, na_sentinel=-1):
 
         Examples
         --------
-
-        >>> x = pd.Series([1, 2, 3])
-        >>> x
+        >>> ser = pd.Series([1, 2, 3])
+        >>> ser
         0    1
         1    2
         2    3
         dtype: int64
 
-        >>> x.searchsorted(4)
+        >>> ser.searchsorted(4)
         3
 
-        >>> x.searchsorted([0, 4])
+        >>> ser.searchsorted([0, 4])
         array([0, 3])
 
-        >>> x.searchsorted([1, 3], side='left')
+        >>> ser.searchsorted([1, 3], side="left")
         array([0, 2])
 
-        >>> x.searchsorted([1, 3], side='right')
+        >>> ser.searchsorted([1, 3], side="right")
         array([1, 3])
 
-        >>> x = pd.Categorical(['apple', 'bread', 'bread',
-                                'cheese', 'milk'], ordered=True)
+        >>> ser = pd.Categorical(
+        ...     ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True
+        ... )
+        >>> ser
         [apple, bread, bread, cheese, milk]
         Categories (4, object): [apple < bread < cheese < milk]
 
-        >>> x.searchsorted('bread')
+        >>> ser.searchsorted('bread')
         1
 
-        >>> x.searchsorted(['bread'], side='right')
+        >>> ser.searchsorted(['bread'], side='right')
         array([3])
 
         If the values are not monotonically sorted, wrong locations
         may be returned:
 
-        >>> x = pd.Series([2, 1, 3])
-        >>> x.searchsorted(1)
+        >>> ser = pd.Series([2, 1, 3])
+        >>> ser.searchsorted(1) # doctest: +SKIP
         0  # wrong result, correct would be 1
         """
 
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index f947a1fda49f1..e2d8fba8d4148 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -4,6 +4,7 @@
 
 These should not depend on core.internals.
 """
+
 from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast
 
 import numpy as np
@@ -200,12 +201,12 @@ def array(
 
     >>> pd.array([1, 2, np.nan])
     <IntegerArray>
-    [1, 2, NaN]
+    [1, 2, <NA>]
     Length: 3, dtype: Int64
 
     >>> pd.array(["a", None, "c"])
     <StringArray>
-    ['a', nan, 'c']
+    ['a', <NA>, 'c']
     Length: 3, dtype: string
 
     >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index a2e348bf98e33..0bdb24e891ad5 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -664,7 +664,7 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries:
         6        7   8
         10      11  12
 
-        >>> df.droplevel('level2', axis=1)
+        >>> df.droplevel('level_2', axis=1)
         level_1   c   d
         a b
         1 2      3   4
@@ -1235,7 +1235,7 @@ def _set_axis_name(self, name, axis=0, inplace=False):
         >>> df.index = pd.MultiIndex.from_product(
         ...                [["mammal"], ['dog', 'cat', 'monkey']])
         >>> df._set_axis_name(["type", "name"])
-                       legs
+                       num_legs
         type   name
         mammal dog        4
                cat        4
@@ -2207,14 +2207,14 @@ def to_json(
 
         Examples
         --------
+        >>> df = pd.DataFrame(
+        ...     [["a", "b"], ["c", "d"]],
+        ...     index=["row 1", "row 2"],
+        ...     columns=["col 1", "col 2"],
+        ... )
 
-        >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']],
-        ...                   index=['row 1', 'row 2'],
-        ...                   columns=['col 1', 'col 2'])
         >>> df.to_json(orient='split')
-        '{"columns":["col 1","col 2"],
-          "index":["row 1","row 2"],
-          "data":[["a","b"],["c","d"]]}'
+        '{"columns":["col 1","col 2"],"index":["row 1","row 2"],"data":[["a","b"],["c","d"]]}'
 
         Encoding/decoding a Dataframe using ``'records'`` formatted JSON.
         Note that index labels are not preserved with this encoding.
@@ -2240,15 +2240,8 @@ def to_json(
         Encoding with Table Schema
 
         >>> df.to_json(orient='table')
-        '{"schema": {"fields": [{"name": "index", "type": "string"},
-                                {"name": "col 1", "type": "string"},
-                                {"name": "col 2", "type": "string"}],
-                     "primaryKey": "index",
-                     "pandas_version": "0.20.0"},
-          "data": [{"index": "row 1", "col 1": "a", "col 2": "b"},
-                   {"index": "row 2", "col 1": "c", "col 2": "d"}]}'
+        '{"schema":{"fields":[{"name":"index","type":"string"},{"name":"col 1","type":"string"},{"name":"col 2","type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},"data":[{"index":"row 1","col 1":"a","col 2":"b"},{"index":"row 2","col 1":"c","col 2":"d"}]}'
         """
-
         from pandas.io import json
 
         if date_format is None and orient == "table":
@@ -4922,18 +4915,17 @@ def sample(
 
         Notes
         -----
-
         Use ``.pipe`` when chaining together functions that expect
         Series, DataFrames or GroupBy objects. Instead of writing
 
-        >>> f(g(h(df), arg1=a), arg2=b, arg3=c)
+        >>> func(g(h(df), arg1=a), arg2=b, arg3=c) # doctest: +SKIP
 
         You can write
 
         >>> (df.pipe(h)
         ...    .pipe(g, arg1=a)
-        ...    .pipe(f, arg2=b, arg3=c)
-        ... )
+        ...    .pipe(func, arg2=b, arg3=c)
+        ... ) # doctest: +SKIP
 
         If you have a function that takes the data as (say) the second
         argument, pass a tuple indicating which keyword expects the
@@ -4941,8 +4933,8 @@ def sample(
 
         >>> (df.pipe(h)
         ...    .pipe(g, arg1=a)
-        ...    .pipe((f, 'arg2'), arg1=a, arg3=c)
-        ...  )
+        ...    .pipe((func, 'arg2'), arg1=a, arg3=c)
+        ...  ) # doctest: +SKIP
     """
 
     @Appender(_shared_docs["pipe"] % _shared_doc_kwargs)
@@ -5290,7 +5282,7 @@ def values(self) -> np.ndarray:
         dtype: object
         >>> df.values
         array([[  3,  94,  31],
-               [ 29, 170, 115]], dtype=int64)
+               [ 29, 170, 115]])
 
         A DataFrame with mixed type columns(e.g., str/object, int64, float32)
         results in an ndarray of the broadest type that accommodates these
@@ -9547,12 +9539,13 @@ def describe(
         ...   np.datetime64("2010-01-01")
         ... ])
         >>> s.describe()
-        count                       3
-        unique                      2
-        top       2010-01-01 00:00:00
-        freq                        2
-        first     2000-01-01 00:00:00
-        last      2010-01-01 00:00:00
+        count                      3
+        mean     2006-09-01 08:00:00
+        min      2000-01-01 00:00:00
+        25%      2004-12-31 12:00:00
+        50%      2010-01-01 00:00:00
+        75%      2010-01-01 00:00:00
+        max      2010-01-01 00:00:00
         dtype: object
 
         Describing a ``DataFrame``. By default only numeric fields
@@ -9575,11 +9568,11 @@ def describe(
 
         Describing all columns of a ``DataFrame`` regardless of data type.
 
-        >>> df.describe(include='all')
-                categorical  numeric object
+        >>> df.describe(include='all') # doctest: +SKIP
+               categorical  numeric object
         count            3      3.0      3
         unique           3      NaN      3
-        top              f      NaN      c
+        top              f      NaN      a
         freq             1      NaN      1
         mean           NaN      2.0    NaN
         std            NaN      1.0    NaN
@@ -9618,11 +9611,11 @@ def describe(
 
         Including only string columns in a ``DataFrame`` description.
 
-        >>> df.describe(include=[np.object])
+        >>> df.describe(include=[np.object]) # doctest: +SKIP
                object
         count       3
         unique      3
-        top         c
+        top         a
         freq        1
 
         Including only categorical columns from a ``DataFrame`` description.
@@ -9636,16 +9629,16 @@ def describe(
 
         Excluding numeric columns from a ``DataFrame`` description.
 
-        >>> df.describe(exclude=[np.number])
+        >>> df.describe(exclude=[np.number]) # doctest: +SKIP
                categorical object
         count            3      3
         unique           3      3
-        top              f      c
+        top              f      a
         freq             1      1
 
         Excluding object columns from a ``DataFrame`` description.
 
-        >>> df.describe(exclude=[np.object])
+        >>> df.describe(exclude=[np.object]) # doctest: +SKIP
                categorical  numeric
         count            3      3.0
         unique           3      NaN

From 65d29cd84d50b1d7ef290b1b612016d427acf1eb Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Sat, 1 Feb 2020 12:28:57 +0200
Subject: [PATCH 02/11] Fix merge conflicts

---
 pandas/core/base.py | 94 ++++++++++++++++++++++-----------------------
 1 file changed, 45 insertions(+), 49 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 68b23465109ec..9fe1af776dd2b 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -46,11 +46,15 @@
 
 
 class PandasObject(DirNamesMixin):
-    """baseclass for various pandas objects"""
+    """
+    Baseclass for various pandas objects.
+    """
 
     @property
     def _constructor(self):
-        """class constructor (for this class it's just `__class__`"""
+        """
+        Class constructor (for this class it's just `__class__`.
+        """
         return type(self)
 
     def __repr__(self) -> str:
@@ -78,16 +82,14 @@ def __sizeof__(self):
         """
         if hasattr(self, "memory_usage"):
             mem = self.memory_usage(deep=True)
-            if not is_scalar(mem):
-                mem = mem.sum()
-            return int(mem)
+            return int(mem if is_scalar(mem) else mem.sum())
 
-        # no memory_usage attribute, so fall back to
-        # object's 'sizeof'
+        # no memory_usage attribute, so fall back to object's 'sizeof'
         return super().__sizeof__()
 
     def _ensure_type(self: T, obj) -> T:
-        """Ensure that an object has same type as self.
+        """
+        Ensure that an object has same type as self.
 
         Used by type checkers.
         """
@@ -96,7 +98,8 @@ def _ensure_type(self: T, obj) -> T:
 
 
 class NoNewAttributesMixin:
-    """Mixin which prevents adding new attributes.
+    """
+    Mixin which prevents adding new attributes.
 
     Prevents additional attributes via xxx.attribute = "something" after a
     call to `self.__freeze()`. Mainly used to prevent the user from using
@@ -107,7 +110,9 @@ class NoNewAttributesMixin:
     """
 
     def _freeze(self):
-        """Prevents setting additional attributes"""
+        """
+        Prevents setting additional attributes.
+        """
         object.__setattr__(self, "__frozen", True)
 
     # prevent adding any attribute via s.xxx.new_attribute = ...
@@ -181,14 +186,12 @@ class SelectionMixin:
     @property
     def _selection_name(self):
         """
-        return a name for myself; this would ideally be called
-        the 'name' property, but we cannot conflict with the
-        Series.name property which can be set
+        Return a name for myself;
+
+        This would ideally be called the 'name' property,
+        but we cannot conflict with the Series.name property which can be set.
         """
-        if self._selection is None:
-            return None  # 'result'
-        else:
-            return self._selection
+        return self._selection
 
     @property
     def _selection_list(self):
@@ -200,7 +203,6 @@ def _selection_list(self):
 
     @cache_readonly
     def _selected_obj(self):
-
         if self._selection is None or isinstance(self.obj, ABCSeries):
             return self.obj
         else:
@@ -247,12 +249,11 @@ def _gotitem(self, key, ndim: int, subset=None):
 
         Parameters
         ----------
-        key : string / list of selections
+        key : str / list of selections
         ndim : 1,2
             requested ndim of result
         subset : object, default None
             subset to act on
-
         """
         raise AbstractMethodError(self)
 
@@ -267,7 +268,6 @@ def _try_aggregate_string_function(self, arg: str, *args, **kwargs):
         - try to find a function (or attribute) on ourselves
         - try to find a numpy function
         - raise
-
         """
         assert isinstance(arg, str)
 
@@ -586,7 +586,6 @@ def _shallow_copy(self, obj, **kwargs):
         """
         return a new object with the replacement attributes
         """
-
         if isinstance(obj, self._constructor):
             obj = obj.obj
         for attr in self._attributes:
@@ -670,8 +669,7 @@ def item(self):
 
         if len(self) == 1:
             return next(iter(self))
-        else:
-            raise ValueError("can only convert an array of size 1 to a Python scalar")
+        raise ValueError("can only convert an array of size 1 to a Python scalar")
 
     @property
     def nbytes(self) -> int:
@@ -736,7 +734,6 @@ def array(self) -> ExtensionArray:
 
         Examples
         --------
-
         For regular NumPy types like int, and float, a PandasArray
         is returned.
 
@@ -852,12 +849,11 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs):
         """
         if is_extension_array_dtype(self.dtype):
             return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs)
-        else:
-            if kwargs:
-                msg = "to_numpy() got an unexpected keyword argument '{}'".format(
-                    list(kwargs.keys())[0]
-                )
-                raise TypeError(msg)
+        elif kwargs:
+            bad_keys = list(kwargs.keys())[0]
+            raise TypeError(
+                f"to_numpy() got an unexpected keyword argument '{bad_keys}'"
+            )
 
         result = np.asarray(self._values, dtype=dtype)
         # TODO(GH-24345): Avoid potential double copy
@@ -1077,7 +1073,9 @@ def _reduce(
         filter_type=None,
         **kwds,
     ):
-        """ perform the reduction type operation if we can """
+        """
+        Perform the reduction type operation if we can.
+        """
         func = getattr(self, name, None)
         if func is None:
             raise TypeError(
@@ -1104,9 +1102,7 @@ def _map_values(self, mapper, na_action=None):
             The output of the mapping function applied to the index.
             If the function returns a tuple with more than one element
             a MultiIndex will be returned.
-
         """
-
         # we can fastpath dict/Series to an efficient map
         # as we know that we are not going to have to yield
         # python types
@@ -1342,7 +1338,9 @@ def is_monotonic(self) -> bool:
 
     @property
     def is_monotonic_increasing(self) -> bool:
-        """alias for is_monotonic"""
+        """
+        Alias for is_monotonic.
+        """
         # mypy complains if we alias directly
         return self.is_monotonic
 
@@ -1456,43 +1454,41 @@ def factorize(self, sort=False, na_sentinel=-1):
 
         Examples
         --------
-        >>> ser = pd.Series([1, 2, 3])
-        >>> ser
+        >>> x = pd.Series([1, 2, 3])
+        >>> x
         0    1
         1    2
         2    3
         dtype: int64
 
-        >>> ser.searchsorted(4)
+        >>> x.searchsorted(4)
         3
 
-        >>> ser.searchsorted([0, 4])
+        >>> x.searchsorted([0, 4])
         array([0, 3])
 
-        >>> ser.searchsorted([1, 3], side="left")
+        >>> x.searchsorted([1, 3], side='left')
         array([0, 2])
 
-        >>> ser.searchsorted([1, 3], side="right")
+        >>> x.searchsorted([1, 3], side='right')
         array([1, 3])
 
-        >>> ser = pd.Categorical(
-        ...     ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True
-        ... )
-        >>> ser
+        >>> x = pd.Categorical(['apple', 'bread', 'bread',
+                                'cheese', 'milk'], ordered=True)
         [apple, bread, bread, cheese, milk]
         Categories (4, object): [apple < bread < cheese < milk]
 
-        >>> ser.searchsorted('bread')
+        >>> x.searchsorted('bread')
         1
 
-        >>> ser.searchsorted(['bread'], side='right')
+        >>> x.searchsorted(['bread'], side='right')
         array([3])
 
         If the values are not monotonically sorted, wrong locations
         may be returned:
 
-        >>> ser = pd.Series([2, 1, 3])
-        >>> ser.searchsorted(1) # doctest: +SKIP
+        >>> x = pd.Series([2, 1, 3])
+        >>> x.searchsorted(1)
         0  # wrong result, correct would be 1
         """
 

From 9e5af034e338fc86d200fdc472d901fe77983226 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Sat, 1 Feb 2020 12:32:50 +0200
Subject: [PATCH 03/11] Fixed pandas/core/base.py

---
 pandas/core/base.py | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 9fe1af776dd2b..97231b6144d09 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1454,41 +1454,49 @@ def factorize(self, sort=False, na_sentinel=-1):
 
         Examples
         --------
-        >>> x = pd.Series([1, 2, 3])
-        >>> x
+        >>> ser = pd.Series([1, 2, 3])
+        >>> ser
         0    1
         1    2
         2    3
         dtype: int64
 
-        >>> x.searchsorted(4)
+        >>> ser.searchsorted(4)
         3
 
-        >>> x.searchsorted([0, 4])
+        >>> ser.searchsorted([0, 4])
         array([0, 3])
 
-        >>> x.searchsorted([1, 3], side='left')
+        >>> ser.searchsorted([1, 3], side='left')
         array([0, 2])
 
-        >>> x.searchsorted([1, 3], side='right')
+        >>> ser.searchsorted([1, 3], side='right')
         array([1, 3])
 
-        >>> x = pd.Categorical(['apple', 'bread', 'bread',
-                                'cheese', 'milk'], ordered=True)
+        >>> ser = pd.Categorical(
+        ...     ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True
+        ... )
+        >>> ser
         [apple, bread, bread, cheese, milk]
         Categories (4, object): [apple < bread < cheese < milk]
 
-        >>> x.searchsorted('bread')
+        >>> ser.searchsorted('bread')
         1
 
-        >>> x.searchsorted(['bread'], side='right')
+        >>> ser.searchsorted(['bread'], side='right')
         array([3])
 
         If the values are not monotonically sorted, wrong locations
         may be returned:
 
-        >>> x = pd.Series([2, 1, 3])
-        >>> x.searchsorted(1)
+        >>> ser = pd.Series([2, 1, 3])
+        >>> ser
+        0    2
+        1    1
+        2    3
+        dtype: int64
+
+        >>> ser.searchsorted(1) # doctest: +SKIP
         0  # wrong result, correct would be 1
         """
 

From 316f8509b4986b1d888da26f5bb44663de90efa6 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Fri, 7 Feb 2020 12:50:28 +0200
Subject: [PATCH 04/11] Fixed lint issues

---
 pandas/core/generic.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 4bc8f519f64cd..5e2d6dfb5ea1f 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2212,7 +2212,9 @@ def to_json(
         ... )
 
         >>> df.to_json(orient='split')
-        '{"columns":["col 1","col 2"],"index":["row 1","row 2"],"data":[["a","b"],["c","d"]]}'
+        '{"columns":["col 1","col 2"],\
+"index":["row 1","row 2"],\
+"data":[["a","b"],["c","d"]]}'
 
         Encoding/decoding a Dataframe using ``'records'`` formatted JSON.
         Note that index labels are not preserved with this encoding.
@@ -2238,7 +2240,11 @@ def to_json(
         Encoding with Table Schema
 
         >>> df.to_json(orient='table')
-        '{"schema":{"fields":[{"name":"index","type":"string"},{"name":"col 1","type":"string"},{"name":"col 2","type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},"data":[{"index":"row 1","col 1":"a","col 2":"b"},{"index":"row 2","col 1":"c","col 2":"d"}]}'
+        '{"schema":{"fields":[{"name":"index","type":"string"},\
+{"name":"col 1","type":"string"},{"name":"col 2","type":"string"}],\
+"primaryKey":["index"],"pandas_version":"0.20.0"},\
+"data":[{"index":"row 1","col 1":"a","col 2":"b"},\
+{"index":"row 2","col 1":"c","col 2":"d"}]}'
         """
         from pandas.io import json
 

From 144caa850bca154202b35bb69ab57e59a5ca05ec Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Fri, 14 Feb 2020 12:22:13 +0200
Subject: [PATCH 05/11] Reverted change from fixing merge conflicts

---
 pandas/core/generic.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index f96804b899a05..a23c6908c2c8b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2182,6 +2182,12 @@ def to_json(
 
         Examples
         --------
+        >>> df = pd.DataFrame(
+        ...     [["a", "b"], ["c", "d"]],
+        ...     index=["row 1", "row 2"],
+        ...     columns=["col 1", "col 2"],
+        ... )
+
         >>> df.to_json(orient='split')
         '{"columns":["col 1","col 2"],\
 "index":["row 1","row 2"],\

From f3dd0435ac088309fdb1aae6ac822626d75eb3e1 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Fri, 14 Feb 2020 12:47:04 +0200
Subject: [PATCH 06/11] to_json examples are now pretty printed

---
 pandas/core/generic.py | 135 +++++++++++++++++++++++++++++++++++------
 1 file changed, 115 insertions(+), 20 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index a23c6908c2c8b..8ef35fd9340ee 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2182,46 +2182,141 @@ def to_json(
 
         Examples
         --------
+        >>> import json
         >>> df = pd.DataFrame(
         ...     [["a", "b"], ["c", "d"]],
         ...     index=["row 1", "row 2"],
         ...     columns=["col 1", "col 2"],
         ... )
 
-        >>> df.to_json(orient='split')
-        '{"columns":["col 1","col 2"],\
-"index":["row 1","row 2"],\
-"data":[["a","b"],["c","d"]]}'
+        >>> result = df.to_json(orient="split")
+        >>> parsed = json.loads(result)
+        >>> json.dumps(parsed, indent=4) # doctest: +SKIP
+        {
+            "columns": [
+                "col 1",
+                "col 2"
+            ],
+            "index": [
+                "row 1",
+                "row 2"
+            ],
+            "data": [
+                [
+                    "a",
+                    "b"
+                ],
+                [
+                    "c",
+                    "d"
+                ]
+            ]
+        }
 
         Encoding/decoding a Dataframe using ``'records'`` formatted JSON.
         Note that index labels are not preserved with this encoding.
 
-        >>> df.to_json(orient='records')
-        '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]'
+        >>> result = df.to_json(orient="records")
+        >>> parsed = json.loads(result)
+        >>> json.dumps(parsed, indent=4) # doctest: +SKIP
+        [
+            {
+                "col 1": "a",
+                "col 2": "b"
+            },
+            {
+                "col 1": "c",
+                "col 2": "d"
+            }
+        ]
 
         Encoding/decoding a Dataframe using ``'index'`` formatted JSON:
 
-        >>> df.to_json(orient='index')
-        '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}'
+        >>> result = df.to_json(orient="index")
+        >>> parsed = json.loads(result)
+        >>> json.dumps(parsed, indent=4) # doctest: +SKIP
+        {
+            "row 1": {
+                "col 1": "a",
+                "col 2": "b"
+            },
+            "row 2": {
+                "col 1": "c",
+                "col 2": "d"
+            }
+        }
 
         Encoding/decoding a Dataframe using ``'columns'`` formatted JSON:
 
-        >>> df.to_json(orient='columns')
-        '{"col 1":{"row 1":"a","row 2":"c"},"col 2":{"row 1":"b","row 2":"d"}}'
+        >>> result = df.to_json(orient="columns")
+        >>> parsed = json.loads(result)
+        >>> json.dumps(parsed, indent=4) # doctest: +SKIP
+        {
+            "col 1": {
+                "row 1": "a",
+                "row 2": "c"
+            },
+            "col 2": {
+                "row 1": "b",
+                "row 2": "d"
+            }
+        }
 
         Encoding/decoding a Dataframe using ``'values'`` formatted JSON:
 
-        >>> df.to_json(orient='values')
-        '[["a","b"],["c","d"]]'
-
-        Encoding with Table Schema
+        >>> result = df.to_json(orient="values")
+        >>> parsed = json.loads(result)
+        >>> json.dumps(parsed, indent=4) # doctest: +SKIP
+        [
+            [
+                "a",
+                "b"
+            ],
+            [
+                "c",
+                "d"
+            ]
+        ]
 
-        >>> df.to_json(orient='table')
-        '{"schema":{"fields":[{"name":"index","type":"string"},\
-{"name":"col 1","type":"string"},{"name":"col 2","type":"string"}],\
-"primaryKey":["index"],"pandas_version":"0.20.0"},\
-"data":[{"index":"row 1","col 1":"a","col 2":"b"},\
-{"index":"row 2","col 1":"c","col 2":"d"}]}'
+        Encoding with Table Schema:
+
+        >>> result = df.to_json(orient="table")
+        >>> parsed = json.loads(result)
+        >>> json.dumps(parsed, indent=4) # doctest: +SKIP
+        {
+            "schema": {
+                "fields": [
+                    {
+                        "name": "index",
+                        "type": "string"
+                    },
+                    {
+                        "name": "col 1",
+                        "type": "string"
+                    },
+                    {
+                        "name": "col 2",
+                        "type": "string"
+                    }
+                ],
+                "primaryKey": [
+                    "index"
+                ],
+                "pandas_version": "0.20.0"
+            },
+            "data": [
+                {
+                    "index": "row 1",
+                    "col 1": "a",
+                    "col 2": "b"
+                },
+                {
+                    "index": "row 2",
+                    "col 1": "c",
+                    "col 2": "d"
+                }
+            ]
+        }
         """
         from pandas.io import json
 

From 4d66fa83c8195c0add33bd1540675946d1a0bdad Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Fri, 14 Feb 2020 12:52:03 +0200
Subject: [PATCH 07/11] Added checks to the CI

---
 ci/code_checks.sh | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 7eb80077c4fab..ed20bc8157cd4 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -268,11 +268,6 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
         -k"-nonzero -reindex -searchsorted -to_dict"
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
-    MSG='Doctests generic.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/generic.py \
-        -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard"
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
     MSG='Doctests groupby.py' ; echo $MSG
     pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe"
     RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -313,6 +308,17 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
     pytest -q --doctest-modules pandas/core/arrays/boolean.py
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Doctests base.py' ; echo $MSG
+    pytest -q --doctest-modules pandas/core/base.py
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    MSG='Doctests construction.py' ; echo $MSG
+    pytest -q --doctest-modules pandas/core/construction.py
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    MSG='Doctests generic.py' ; echo $MSG
+    pytest -q --doctest-modules pandas/core/generic.py
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
 fi
 
 ### DOCSTRINGS ###

From 4c304a6e63916ab618ecadd161f11c0cd8decd27 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Sat, 15 Feb 2020 13:30:59 +0200
Subject: [PATCH 08/11] Skipping "clipboard" examples as there's no clipborad
 in the CI

---
 pandas/core/generic.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index ccfffd0c1cf31..403f40d4fa327 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2743,7 +2743,8 @@ def to_clipboard(
         Copy the contents of a DataFrame to the clipboard.
 
         >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C'])
-        >>> df.to_clipboard(sep=',')
+
+        >>> df.to_clipboard(sep=',') # doctest: +SKIP
         ... # Wrote the following to the system clipboard:
         ... # ,A,B,C
         ... # 0,1,2,3
@@ -2752,7 +2753,7 @@ def to_clipboard(
         We can omit the index by passing the keyword `index` and setting
         it to false.
 
-        >>> df.to_clipboard(sep=',', index=False)
+        >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP
         ... # Wrote the following to the system clipboard:
         ... # A,B,C
         ... # 1,2,3

From d489b45bdae1371a146aac7aaddd29488335e0b0 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Thu, 27 Feb 2020 19:24:46 +0200
Subject: [PATCH 09/11] Reverted the wrong merge error

---
 ci/code_checks.sh | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 6ad971a5eb1d4..8cf9f164d140a 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -267,12 +267,6 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
         -k"-nonzero -reindex -searchsorted -to_dict"
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
-
-    MSG='Doctests generic.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/generic.py \
-        -k"-_set_axis_name -_xs -describe -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard"
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
     MSG='Doctests groupby.py' ; echo $MSG
     pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe"
     RET=$(($RET + $?)) ; echo $MSG "DONE"

From 44ca5d5d68ade678d253e9aebd37054f94fdb079 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Thu, 27 Feb 2020 19:31:57 +0200
Subject: [PATCH 10/11] Added extra space to the doctest skip comment

REF:
https://github.com/pandas-dev/pandas/pull/31472#issuecomment-591624321
---
 pandas/core/base.py    |  2 +-
 pandas/core/generic.py | 30 +++++++++++++++---------------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 83b85249e72bd..f2b678500a985 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1522,7 +1522,7 @@ def factorize(self, sort=False, na_sentinel=-1):
         2    3
         dtype: int64
 
-        >>> ser.searchsorted(1) # doctest: +SKIP
+        >>> ser.searchsorted(1)  # doctest: +SKIP
         0  # wrong result, correct would be 1
         """
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 5191aa04498bd..5733e4dc99c68 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2190,7 +2190,7 @@ def to_json(
 
         >>> result = df.to_json(orient="split")
         >>> parsed = json.loads(result)
-        >>> json.dumps(parsed, indent=4) # doctest: +SKIP
+        >>> json.dumps(parsed, indent=4)   # doctest: +SKIP
         {
             "columns": [
                 "col 1",
@@ -2217,7 +2217,7 @@ def to_json(
 
         >>> result = df.to_json(orient="records")
         >>> parsed = json.loads(result)
-        >>> json.dumps(parsed, indent=4) # doctest: +SKIP
+        >>> json.dumps(parsed, indent=4)  # doctest: +SKIP
         [
             {
                 "col 1": "a",
@@ -2233,7 +2233,7 @@ def to_json(
 
         >>> result = df.to_json(orient="index")
         >>> parsed = json.loads(result)
-        >>> json.dumps(parsed, indent=4) # doctest: +SKIP
+        >>> json.dumps(parsed, indent=4)  # doctest: +SKIP
         {
             "row 1": {
                 "col 1": "a",
@@ -2249,7 +2249,7 @@ def to_json(
 
         >>> result = df.to_json(orient="columns")
         >>> parsed = json.loads(result)
-        >>> json.dumps(parsed, indent=4) # doctest: +SKIP
+        >>> json.dumps(parsed, indent=4)  # doctest: +SKIP
         {
             "col 1": {
                 "row 1": "a",
@@ -2265,7 +2265,7 @@ def to_json(
 
         >>> result = df.to_json(orient="values")
         >>> parsed = json.loads(result)
-        >>> json.dumps(parsed, indent=4) # doctest: +SKIP
+        >>> json.dumps(parsed, indent=4)  # doctest: +SKIP
         [
             [
                 "a",
@@ -2281,7 +2281,7 @@ def to_json(
 
         >>> result = df.to_json(orient="table")
         >>> parsed = json.loads(result)
-        >>> json.dumps(parsed, indent=4) # doctest: +SKIP
+        >>> json.dumps(parsed, indent=4)  # doctest: +SKIP
         {
             "schema": {
                 "fields": [
@@ -2743,7 +2743,7 @@ def to_clipboard(
 
         >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C'])
 
-        >>> df.to_clipboard(sep=',') # doctest: +SKIP
+        >>> df.to_clipboard(sep=',')  # doctest: +SKIP
         ... # Wrote the following to the system clipboard:
         ... # ,A,B,C
         ... # 0,1,2,3
@@ -2752,7 +2752,7 @@ def to_clipboard(
         We can omit the index by passing the keyword `index` and setting
         it to false.
 
-        >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP
+        >>> df.to_clipboard(sep=',', index=False)  # doctest: +SKIP
         ... # Wrote the following to the system clipboard:
         ... # A,B,C
         ... # 1,2,3
@@ -4987,14 +4987,14 @@ def sample(
         Use ``.pipe`` when chaining together functions that expect
         Series, DataFrames or GroupBy objects. Instead of writing
 
-        >>> func(g(h(df), arg1=a), arg2=b, arg3=c) # doctest: +SKIP
+        >>> func(g(h(df), arg1=a), arg2=b, arg3=c)  # doctest: +SKIP
 
         You can write
 
         >>> (df.pipe(h)
         ...    .pipe(g, arg1=a)
         ...    .pipe(func, arg2=b, arg3=c)
-        ... ) # doctest: +SKIP
+        ... )  # doctest: +SKIP
 
         If you have a function that takes the data as (say) the second
         argument, pass a tuple indicating which keyword expects the
@@ -5003,7 +5003,7 @@ def sample(
         >>> (df.pipe(h)
         ...    .pipe(g, arg1=a)
         ...    .pipe((func, 'arg2'), arg1=a, arg3=c)
-        ...  ) # doctest: +SKIP
+        ...  )  # doctest: +SKIP
     """
 
     @Appender(_shared_docs["pipe"] % _shared_doc_kwargs)
@@ -9636,7 +9636,7 @@ def describe(
 
         Describing all columns of a ``DataFrame`` regardless of data type.
 
-        >>> df.describe(include='all') # doctest: +SKIP
+        >>> df.describe(include='all')  # doctest: +SKIP
                categorical  numeric object
         count            3      3.0      3
         unique           3      NaN      3
@@ -9679,7 +9679,7 @@ def describe(
 
         Including only string columns in a ``DataFrame`` description.
 
-        >>> df.describe(include=[np.object]) # doctest: +SKIP
+        >>> df.describe(include=[np.object])  # doctest: +SKIP
                object
         count       3
         unique      3
@@ -9697,7 +9697,7 @@ def describe(
 
         Excluding numeric columns from a ``DataFrame`` description.
 
-        >>> df.describe(exclude=[np.number]) # doctest: +SKIP
+        >>> df.describe(exclude=[np.number])  # doctest: +SKIP
                categorical object
         count            3      3
         unique           3      3
@@ -9706,7 +9706,7 @@ def describe(
 
         Excluding object columns from a ``DataFrame`` description.
 
-        >>> df.describe(exclude=[np.object]) # doctest: +SKIP
+        >>> df.describe(exclude=[np.object])  # doctest: +SKIP
                categorical  numeric
         count            3      3.0
         unique           3      NaN

From 94ec83d0ab3416cd968568d93b81ff9014e48076 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Thu, 27 Feb 2020 19:35:45 +0200
Subject: [PATCH 11/11] Removed a single space

---
 pandas/core/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 5733e4dc99c68..b03ba2d325db5 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2190,7 +2190,7 @@ def to_json(
 
         >>> result = df.to_json(orient="split")
         >>> parsed = json.loads(result)
-        >>> json.dumps(parsed, indent=4)   # doctest: +SKIP
+        >>> json.dumps(parsed, indent=4)  # doctest: +SKIP
         {
             "columns": [
                 "col 1",