From e91444edd1ce8afbc8617ee1cff1d8dde5dfd00e Mon Sep 17 00:00:00 2001
From: pdpark <adad@sbcglobal.net>
Date: Fri, 12 Jan 2018 15:01:03 -0800
Subject: [PATCH 1/5] DOC: Adds example of alternative to storing lists in a
 Dataframe

Restores: #17027
---
 doc/source/gotchas.rst | 89 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst
index bc490877e190d..0e99cebc30abd 100644
--- a/doc/source/gotchas.rst
+++ b/doc/source/gotchas.rst
@@ -332,3 +332,92 @@ using something similar to the following:
 See `the NumPy documentation on byte order
 <https://docs.scipy.org/doc/numpy/user/basics.byteswapping.html>`__ for more
 details.
+
+
+Alternative to storing lists in DataFrame Cells
+------------------------------------------------------
+Storing nested lists/arrays inside a pandas object should be avoided for performance and memory use reasons. Instead they should be "exploded" into a flat ``DataFrame`` structure.
+
+Example of exploding nested lists into a DataFrame:
+
+.. ipython:: python
+
+   df = pd.DataFrame({'name': ['A.J. Price'] * 3, 
+                      'opponent': ['76ers', 'blazers', 'bobcats'], 
+                      'nearest_neighbors': [['Zach LaVine', 'Jeremy Lin', 'Nate Robinson', 'Isaia']] * 3},
+                     columns=['name','opponent','attribute x','nearest_neighbors'])
+   df
+
+   nearest_neighbors = [['Zach LaVine', 'Jeremy Lin', 'Nate Robinson', 'Isaia']]*3
+   nearest_neighbors
+
+   #. Create an index with the "parent" columns to be included in the final Dataframe
+   df2 = pd.concat([df[['name','opponent']], pd.DataFrame(nearest_neighbors)], axis=1)
+   df2
+
+   #. Transform the column with lists into series, which become columns in a new Dataframe.
+   #    Note that only the index from the original df is retained - 
+   #    any other columns in the original df are not part of the new df
+   df3 = df2.set_index(['name', 'opponent'])
+   df3
+
+   #. Stack the new columns as rows; this creates a new index level we'll want to drop in the next step.
+   #    Note that at this point we have a Series, not a Dataframe
+   ser = df3.stack()
+   ser
+
+   #. Drop the extraneous index level created by the stack
+   ser.reset_index(level=2, drop=True, inplace=True)
+   ser
+
+   #. Create a Dataframe from the Series
+   df4 = ser.to_frame('nearest_neighbors')
+   df4
+
+   # All steps in one stack
+   df4 = (df2.set_index(['name', 'opponent'])
+           .stack()
+           .reset_index(level=2, drop=True)
+           .to_frame('nearest_neighbors'))
+   df4
+
+Example of exploding a list embedded in a dataframe:
+
+.. ipython:: python
+
+   df = pd.DataFrame({'name': ['A.J. Price'] * 3, 
+                      'opponent': ['76ers', 'blazers', 'bobcats'], 
+                      'nearest_neighbors': [['Zach LaVine', 'Jeremy Lin', 'Nate Robinson', 'Isaia']] * 3},
+                     columns=['name','opponent','attribute x','nearest_neighbors'])
+   df
+
+   #. Create an index with the "parent" columns to be included in the final Dataframe
+   df2 = df.set_index(['name', 'opponent'])
+   df2
+
+   #. Transform the column with lists into series, which become columns in a new Dataframe.
+   #    Note that only the index from the original df is retained - 
+   #    any other columns in the original df are not part of the new df
+   df3 = df2.nearest_neighbors.apply(pd.Series)
+   df3
+
+   #. Stack the new columns as rows; this creates a new index level we'll want to drop in the next step.
+   #    Note that at this point we have a Series, not a Dataframe
+   ser = df3.stack()
+   ser
+
+   #. Drop the extraneous index level created by the stack
+   ser.reset_index(level=2, drop=True, inplace=True)
+   ser
+
+   #. Create a Dataframe from the Series
+   df4 = ser.to_frame('nearest_neighbors')
+   df4
+
+   # All steps in one stack
+   df4 = (df.set_index(['name', 'opponent'])
+           .nearest_neighbors.apply(pd.Series)
+           .stack()
+           .reset_index(level=2, drop=True)
+           .to_frame('nearest_neighbors'))
+   df4

From 11ff8a7d898130de49d8a366501ac90b1408b727 Mon Sep 17 00:00:00 2001
From: pdpark <adad@sbcglobal.net>
Date: Fri, 12 Jan 2018 15:43:10 -0800
Subject: [PATCH 2/5] Doc: Fixes issues with code examples.

---
 doc/source/gotchas.rst | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst
index 0e99cebc30abd..b2854670739f4 100644
--- a/doc/source/gotchas.rst
+++ b/doc/source/gotchas.rst
@@ -343,9 +343,8 @@ Example of exploding nested lists into a DataFrame:
 .. ipython:: python
 
    df = pd.DataFrame({'name': ['A.J. Price'] * 3, 
-                      'opponent': ['76ers', 'blazers', 'bobcats'], 
-                      'nearest_neighbors': [['Zach LaVine', 'Jeremy Lin', 'Nate Robinson', 'Isaia']] * 3},
-                     columns=['name','opponent','attribute x','nearest_neighbors'])
+                      'opponent': ['76ers', 'blazers', 'bobcats']},
+                     columns=['name','opponent'])
    df
 
    nearest_neighbors = [['Zach LaVine', 'Jeremy Lin', 'Nate Robinson', 'Isaia']]*3
@@ -388,7 +387,7 @@ Example of exploding a list embedded in a dataframe:
    df = pd.DataFrame({'name': ['A.J. Price'] * 3, 
                       'opponent': ['76ers', 'blazers', 'bobcats'], 
                       'nearest_neighbors': [['Zach LaVine', 'Jeremy Lin', 'Nate Robinson', 'Isaia']] * 3},
-                     columns=['name','opponent','attribute x','nearest_neighbors'])
+                     columns=['name','opponent','nearest_neighbors'])
    df
 
    #. Create an index with the "parent" columns to be included in the final Dataframe

From 6d379b49b209b7e7284fe97ee734f05bc964b6a8 Mon Sep 17 00:00:00 2001
From: Gautam <gautam@mishragautam.com>
Date: Mon, 8 Oct 2018 14:15:16 +0530
Subject: [PATCH 3/5] DOC: Add example of alternative to storing lists in a
 Dataframe fix PR19215

---
 doc/source/gotchas.rst | 97 ++++++++++++++++++++++++++----------------
 1 file changed, 61 insertions(+), 36 deletions(-)

diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst
index fa16a357f82d0..625ab8b12a638 100644
--- a/doc/source/gotchas.rst
+++ b/doc/source/gotchas.rst
@@ -339,34 +339,43 @@ details.
 
 
 Alternative to storing lists in DataFrame Cells
-------------------------------------------------------
+-----------------------------------------------
 Storing nested lists/arrays inside a pandas object should be avoided for performance and memory use reasons. Instead they should be "exploded" into a flat ``DataFrame`` structure.
 
 Example of exploding nested lists into a DataFrame:
 
 .. ipython:: python
 
-   df = pd.DataFrame({'name': ['A.J. Price'] * 3, 
+   dframe = pd.DataFrame({'name': ['A.J. Price'] * 3, 
                       'opponent': ['76ers', 'blazers', 'bobcats']},
                      columns=['name','opponent'])
-   df
+   dframe
 
    nearest_neighbors = [['Zach LaVine', 'Jeremy Lin', 'Nate Robinson', 'Isaia']]*3
    nearest_neighbors
 
-   #. Create an index with the "parent" columns to be included in the final Dataframe
-   df2 = pd.concat([df[['name','opponent']], pd.DataFrame(nearest_neighbors)], axis=1)
-   df2
+Create an index with the "parent" columns to be included in the final Dataframe
+
+.. ipython:: python
+
+   df = pd.concat([dframe[['name','opponent']], pd.DataFrame(nearest_neighbors)], axis=1)
+   df
+
+Transform the column with lists into series, which become columns in a new Dataframe.
+   Note that only the index from the original df is retained - 
+   Any other columns in the original df are not part of the new df
+
+.. ipython:: python
+
+   df = df.set_index(['name', 'opponent'])
+   df
+
+Stack the new columns as rows; this creates a new index level we'll want to drop in the next step.
+   Note that at this point we have a Series, not a Dataframe
 
-   #. Transform the column with lists into series, which become columns in a new Dataframe.
-   #    Note that only the index from the original df is retained - 
-   #    any other columns in the original df are not part of the new df
-   df3 = df2.set_index(['name', 'opponent'])
-   df3
+.. ipython:: python
 
-   #. Stack the new columns as rows; this creates a new index level we'll want to drop in the next step.
-   #    Note that at this point we have a Series, not a Dataframe
-   ser = df3.stack()
+   ser = df.stack()
    ser
 
    #. Drop the extraneous index level created by the stack
@@ -374,39 +383,52 @@ Example of exploding nested lists into a DataFrame:
    ser
 
    #. Create a Dataframe from the Series
-   df4 = ser.to_frame('nearest_neighbors')
-   df4
+   df = ser.to_frame('nearest_neighbors')
+   df
+
+All steps in one stack
+
+.. ipython:: python
 
-   # All steps in one stack
-   df4 = (df2.set_index(['name', 'opponent'])
+   df = (dframe.concat([df[['name','opponent']], pd.DataFrame(nearest_neighbors)], axis=1)
+	   .set_index(['name', 'opponent'])
            .stack()
            .reset_index(level=2, drop=True)
            .to_frame('nearest_neighbors'))
-   df4
+   df
 
 Example of exploding a list embedded in a dataframe:
 
 .. ipython:: python
 
-   df = pd.DataFrame({'name': ['A.J. Price'] * 3, 
+   dframe = pd.DataFrame({'name': ['A.J. Price'] * 3, 
                       'opponent': ['76ers', 'blazers', 'bobcats'], 
                       'nearest_neighbors': [['Zach LaVine', 'Jeremy Lin', 'Nate Robinson', 'Isaia']] * 3},
                      columns=['name','opponent','nearest_neighbors'])
+   dframe
+
+Create an index with the "parent" columns to be included in the final Dataframe
+
+.. ipython:: python
+
+   df = dframe.set_index(['name', 'opponent'])
+   df
+
+Transform the column with lists into series, which become columns in a new Dataframe.
+  Note that only the index from the original df is retained - 
+
+.. ipython:: python
+
+  any other columns in the original df are not part of the new df
+   df = df.nearest_neighbors.apply(pd.Series)
    df
 
-   #. Create an index with the "parent" columns to be included in the final Dataframe
-   df2 = df.set_index(['name', 'opponent'])
-   df2
+Stack the new columns as rows; this creates a new index level we'll want to drop in the next step.
+  Note that at this point we have a Series, not a Dataframe
 
-   #. Transform the column with lists into series, which become columns in a new Dataframe.
-   #    Note that only the index from the original df is retained - 
-   #    any other columns in the original df are not part of the new df
-   df3 = df2.nearest_neighbors.apply(pd.Series)
-   df3
+.. ipython:: python
 
-   #. Stack the new columns as rows; this creates a new index level we'll want to drop in the next step.
-   #    Note that at this point we have a Series, not a Dataframe
-   ser = df3.stack()
+   ser = df.stack()
    ser
 
    #. Drop the extraneous index level created by the stack
@@ -414,13 +436,16 @@ Example of exploding a list embedded in a dataframe:
    ser
 
    #. Create a Dataframe from the Series
-   df4 = ser.to_frame('nearest_neighbors')
-   df4
+   df = ser.to_frame('nearest_neighbors')
+   df
 
-   # All steps in one stack
-   df4 = (df.set_index(['name', 'opponent'])
+All steps in one stack
+
+.. ipython:: python
+
+   df = (dframe.set_index(['name', 'opponent'])
            .nearest_neighbors.apply(pd.Series)
            .stack()
            .reset_index(level=2, drop=True)
            .to_frame('nearest_neighbors'))
-   df4
+   df

From a5a9ec2a6df8d4a1631c1bdd51791092ddf21019 Mon Sep 17 00:00:00 2001
From: Gautam <gautam@mishragautam.com>
Date: Mon, 8 Oct 2018 14:31:55 +0530
Subject: [PATCH 4/5] DOC: Adds example of alternative to storing lists in a
 Dataframe - made more fixes

---
 doc/source/gotchas.rst | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst
index 625ab8b12a638..3670e949a4592 100644
--- a/doc/source/gotchas.rst
+++ b/doc/source/gotchas.rst
@@ -361,17 +361,16 @@ Create an index with the "parent" columns to be included in the final Dataframe
    df = pd.concat([dframe[['name','opponent']], pd.DataFrame(nearest_neighbors)], axis=1)
    df
 
-Transform the column with lists into series, which become columns in a new Dataframe.
-   Note that only the index from the original df is retained - 
-   Any other columns in the original df are not part of the new df
+Transform the column with lists into series, which become columns in a new Dataframe. 
+Note that only the index from the original df is retained - Any other columns in the original df are not part of the new df
 
 .. ipython:: python
 
    df = df.set_index(['name', 'opponent'])
    df
 
-Stack the new columns as rows; this creates a new index level we'll want to drop in the next step.
-   Note that at this point we have a Series, not a Dataframe
+Stack the new columns as rows; this creates a new index level we'll want to drop in the next step. 
+Note that at this point we have a Series, not a Dataframe
 
 .. ipython:: python
 
@@ -414,17 +413,16 @@ Create an index with the "parent" columns to be included in the final Dataframe
    df = dframe.set_index(['name', 'opponent'])
    df
 
-Transform the column with lists into series, which become columns in a new Dataframe.
-  Note that only the index from the original df is retained - 
+Transform the column with lists into series, which become columns in a new Dataframe. 
+Note that only the index from the original df is retained - any other columns in the original df are not part of the new df
 
 .. ipython:: python
 
-  any other columns in the original df are not part of the new df
    df = df.nearest_neighbors.apply(pd.Series)
    df
 
-Stack the new columns as rows; this creates a new index level we'll want to drop in the next step.
-  Note that at this point we have a Series, not a Dataframe
+Stack the new columns as rows; this creates a new index level we'll want to drop in the next step. 
+Note that at this point we have a Series, not a Dataframe
 
 .. ipython:: python
 

From 4952597f4a94a4e21a5ec72fd8d5fef2dfa81a5f Mon Sep 17 00:00:00 2001
From: Gautam <gautam@mishragautam.com>
Date: Mon, 8 Oct 2018 15:55:19 +0530
Subject: [PATCH 5/5] Doc: Adds example of exploding lists into columns instead
 of storing in dataframe cells

---
 doc/source/gotchas.rst | 36 ++++++++----------------------------
 1 file changed, 8 insertions(+), 28 deletions(-)

diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst
index 3670e949a4592..1da7e38c7bab7 100644
--- a/doc/source/gotchas.rst
+++ b/doc/source/gotchas.rst
@@ -346,10 +346,10 @@ Example of exploding nested lists into a DataFrame:
 
 .. ipython:: python
 
-   dframe = pd.DataFrame({'name': ['A.J. Price'] * 3, 
+   df = pd.DataFrame({'name': ['A.J. Price'] * 3, 
                       'opponent': ['76ers', 'blazers', 'bobcats']},
-                     columns=['name','opponent'])
-   dframe
+                       columns=['name','opponent'])
+   df
 
    nearest_neighbors = [['Zach LaVine', 'Jeremy Lin', 'Nate Robinson', 'Isaia']]*3
    nearest_neighbors
@@ -358,7 +358,7 @@ Create an index with the "parent" columns to be included in the final Dataframe
 
 .. ipython:: python
 
-   df = pd.concat([dframe[['name','opponent']], pd.DataFrame(nearest_neighbors)], axis=1)
+   df = pd.concat([df[['name','opponent']], pd.DataFrame(nearest_neighbors)], axis=1)
    df
 
 Transform the column with lists into series, which become columns in a new Dataframe. 
@@ -385,32 +385,22 @@ Note that at this point we have a Series, not a Dataframe
    df = ser.to_frame('nearest_neighbors')
    df
 
-All steps in one stack
-
-.. ipython:: python
-
-   df = (dframe.concat([df[['name','opponent']], pd.DataFrame(nearest_neighbors)], axis=1)
-	   .set_index(['name', 'opponent'])
-           .stack()
-           .reset_index(level=2, drop=True)
-           .to_frame('nearest_neighbors'))
-   df
 
 Example of exploding a list embedded in a dataframe:
 
 .. ipython:: python
 
-   dframe = pd.DataFrame({'name': ['A.J. Price'] * 3, 
+   df = pd.DataFrame({'name': ['A.J. Price'] * 3, 
                       'opponent': ['76ers', 'blazers', 'bobcats'], 
                       'nearest_neighbors': [['Zach LaVine', 'Jeremy Lin', 'Nate Robinson', 'Isaia']] * 3},
-                     columns=['name','opponent','nearest_neighbors'])
-   dframe
+                       columns=['name','opponent','nearest_neighbors'])
+   df
 
 Create an index with the "parent" columns to be included in the final Dataframe
 
 .. ipython:: python
 
-   df = dframe.set_index(['name', 'opponent'])
+   df = df.set_index(['name', 'opponent'])
    df
 
 Transform the column with lists into series, which become columns in a new Dataframe. 
@@ -437,13 +427,3 @@ Note that at this point we have a Series, not a Dataframe
    df = ser.to_frame('nearest_neighbors')
    df
 
-All steps in one stack
-
-.. ipython:: python
-
-   df = (dframe.set_index(['name', 'opponent'])
-           .nearest_neighbors.apply(pd.Series)
-           .stack()
-           .reset_index(level=2, drop=True)
-           .to_frame('nearest_neighbors'))
-   df