From d06bf49ddd7d909da81003c5a3444e0cd74404bd Mon Sep 17 00:00:00 2001 From: Kyle Stahl Date: Sat, 14 Sep 2019 13:28:45 -0500 Subject: [PATCH 1/9] explode multiple columns at same time --- pandas/core/frame.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f1ed3a125f60c..7e4a75d500b1b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6261,18 +6261,31 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame": 3 3 1 3 4 1 """ - - if not (is_scalar(column) or isinstance(column, tuple)): - raise ValueError("column must be a scalar") if not self.columns.is_unique: raise ValueError("columns must be unique") - result = self[column].explode() - return ( - self.drop([column], axis=1) - .join(result) - .reindex(columns=self.columns, copy=False) - ) + if isinstance(columns, str): + columns = [columns] + + if not isinstance(columns, list): + raise TypeError("columns value not list or sting") + + if not all([c in self.columns for c in columns]): + raise ValueError("column name(s) not in index") + + tmp = pd.DataFrame() + lengths_equal = [] + for row in self[columns].iterrows(): + r = row[1] + lengths_equal.append(len(set([len(r[c]) for c in columns]))==1) + if all(lengths_equal): + for c in columns: + tmp[c] = self[c].explode() + else: + ValueError("lengths of lists in the same row not equal") + + results = self.drop(columns, axis=1).join(tmp) + return(results) def unstack(self, level=-1, fill_value=None): """ From f556e1f30a9e0804ec5acabf6198db9606bbb5d4 Mon Sep 17 00:00:00 2001 From: Kyle Stahl Date: Mon, 16 Sep 2019 10:55:43 -0500 Subject: [PATCH 2/9] fix bugs --- pandas/core/frame.py | 60 +++++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7e4a75d500b1b..af85888cc7cd0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6205,16 +6205,16 @@ def stack(self, level=-1, dropna=True): else: return stack(self, level, dropna=dropna) - def explode(self, column: Union[str, Tuple]) -> "DataFrame": + def explode(self, columns: Union[str, List[str]]) -> "DataFrame": """ - Transform each element of a list-like to a row, replicating index values. + Transform each element of a list-like to a row, replicating the + index values. .. versionadded:: 0.25.0 Parameters ---------- column : str or tuple - Column to explode. Returns ------- @@ -6230,8 +6230,8 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame": See Also -------- DataFrame.unstack : Pivot a level of the (necessarily hierarchical) - index labels. - DataFrame.melt : Unpivot a DataFrame from wide format to long format. + index labels + DataFrame.melt : Unpivot a DataFrame from wide format to long format Series.explode : Explode a DataFrame from list-like columns to long format. Notes @@ -6260,30 +6260,60 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame": 2 NaN 1 3 3 1 3 4 1 - """ + + >>> df = pd.DataFrame({'A': [[1, 2, 3], 'foo', [], [3, 4]], + 'B': 1, + 'C': [[7,8,9],'bar',[],[8,7]]}) + >>> df + A B C + 0 [1, 2, 3] 1 [7, 8, 9] + 1 foo 1 bar + 2 [] 1 [] + 3 [3, 4] 1 [8, 7] + + >>> df.explode(['A','C']) + B A C + 0 1 1 7 + 0 1 2 8 + 0 1 3 9 + 1 1 foo bar + 2 1 NaN NaN + 3 1 3 8 + 3 1 4 7 + """ + + # Validate data if not self.columns.is_unique: raise ValueError("columns must be unique") - + if isinstance(columns, str): columns = [columns] - + if not isinstance(columns, list): raise TypeError("columns value not list or sting") - + if not all([c in self.columns for c in columns]): raise ValueError("column name(s) not in index") - - tmp = pd.DataFrame() + + tmp = self.iloc[0:0,0:0].copy() # creates empty temp df lengths_equal = [] + for row in self[columns].iterrows(): - r = row[1] - lengths_equal.append(len(set([len(r[c]) for c in columns]))==1) + # converts non-lists into 1 element lists + r=row[1].apply(lambda x: x if type(x) in (list,tuple) else [x]) + + # make sure all lists in the same record are the same length + row_is_ok = len(set([len(r[c]) for c in columns])) == 1 + lengths_equal.append(row_is_ok) + + # Explode all columns if lengths match if all(lengths_equal): for c in columns: tmp[c] = self[c].explode() else: - ValueError("lengths of lists in the same row not equal") - + raise ValueError("lengths of lists in the same row not equal") + + # join in exploded columns results = self.drop(columns, axis=1).join(tmp) return(results) From fa9dfe49b51a105a2b3f7eeb0987868b4d9888bd Mon Sep 17 00:00:00 2001 From: Kyle Stahl Date: Mon, 16 Sep 2019 11:00:54 -0500 Subject: [PATCH 3/9] syntax --- pandas/core/frame.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index af85888cc7cd0..e6e12a80d5b61 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6207,8 +6207,7 @@ def stack(self, level=-1, dropna=True): def explode(self, columns: Union[str, List[str]]) -> "DataFrame": """ - Transform each element of a list-like to a row, replicating the - index values. + Transform each element of a list-like to a row, replicating the index values. .. versionadded:: 0.25.0 @@ -6230,8 +6229,8 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame": See Also -------- DataFrame.unstack : Pivot a level of the (necessarily hierarchical) - index labels - DataFrame.melt : Unpivot a DataFrame from wide format to long format + index labels. + DataFrame.melt : Unpivot a DataFrame from wide format to long format. Series.explode : Explode a DataFrame from list-like columns to long format. Notes From 1e452a76f7f17a3fb37d973259876f6e100c214a Mon Sep 17 00:00:00 2001 From: Kyle Stahl Date: Mon, 16 Sep 2019 11:11:47 -0500 Subject: [PATCH 4/9] ENH: DataFrame.explode() allow for multiple columns Now if you pass a list of column names to .explode(), so long as all the lengths of lists are consistent across all the columns for each records, all the columns will be exploded. --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e6e12a80d5b61..f7b6df8299abf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6310,7 +6310,7 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame": for c in columns: tmp[c] = self[c].explode() else: - raise ValueError("lengths of lists in the same row not equal") + raise ValueError("Exploded lists from `columns` do not have equivalent length within the same record") # join in exploded columns results = self.drop(columns, axis=1).join(tmp) From 6326745e18426bc6f07064b587e560834f9d6151 Mon Sep 17 00:00:00 2001 From: Kyle Stahl Date: Mon, 16 Sep 2019 11:25:35 -0500 Subject: [PATCH 5/9] ENH: DataFrame.explode() multiple columns Now explode() can also take in a list of columns and explode them all, given that for every record in the dataframe the elements of the exploding columns all have the same length --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f7b6df8299abf..77ce723ebba59 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6298,7 +6298,7 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame": lengths_equal = [] for row in self[columns].iterrows(): - # converts non-lists into 1 element lists + # converts non-lists into 1 element lists so len() is valid r=row[1].apply(lambda x: x if type(x) in (list,tuple) else [x]) # make sure all lists in the same record are the same length From fa0df4223dd0a0fc8d716359dbd3451912912009 Mon Sep 17 00:00:00 2001 From: Kyle Stahl Date: Mon, 16 Sep 2019 11:31:43 -0500 Subject: [PATCH 6/9] ENH: explode multiple columns --- pandas/core/frame.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 77ce723ebba59..930fac1322bde 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6207,14 +6207,15 @@ def stack(self, level=-1, dropna=True): def explode(self, columns: Union[str, List[str]]) -> "DataFrame": """ - Transform each element of a list-like to a row, replicating the index values. + Transform each element of a list-like to a row, replicating index values. .. versionadded:: 0.25.0 Parameters ---------- - column : str or tuple - + columns : str or list + the column(s) to be exploded + Returns ------- DataFrame From 8215fa9fa1de129e997a96bea231e4f6c2269c3a Mon Sep 17 00:00:00 2001 From: Kyle Stahl Date: Mon, 16 Sep 2019 11:35:44 -0500 Subject: [PATCH 7/9] ENH: Explode multiple columns of DataFrame --- pandas/core/frame.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 930fac1322bde..e47d3dab79baa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6215,7 +6215,7 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame": ---------- columns : str or list the column(s) to be exploded - + Returns ------- DataFrame @@ -6281,23 +6281,23 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame": 3 1 3 8 3 1 4 7 """ - + # Validate data if not self.columns.is_unique: raise ValueError("columns must be unique") - + if isinstance(columns, str): columns = [columns] - + if not isinstance(columns, list): raise TypeError("columns value not list or sting") - + if not all([c in self.columns for c in columns]): raise ValueError("column name(s) not in index") - + tmp = self.iloc[0:0,0:0].copy() # creates empty temp df lengths_equal = [] - + for row in self[columns].iterrows(): # converts non-lists into 1 element lists so len() is valid r=row[1].apply(lambda x: x if type(x) in (list,tuple) else [x]) @@ -6305,14 +6305,14 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame": # make sure all lists in the same record are the same length row_is_ok = len(set([len(r[c]) for c in columns])) == 1 lengths_equal.append(row_is_ok) - + # Explode all columns if lengths match if all(lengths_equal): for c in columns: tmp[c] = self[c].explode() else: raise ValueError("Exploded lists from `columns` do not have equivalent length within the same record") - + # join in exploded columns results = self.drop(columns, axis=1).join(tmp) return(results) From 34fef5306ad0735e501a8f0e9d6c32f086a921cf Mon Sep 17 00:00:00 2001 From: Kyle Stahl Date: Mon, 16 Sep 2019 11:43:32 -0500 Subject: [PATCH 8/9] multiple columns for explode method --- pandas/core/frame.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e47d3dab79baa..8f6a2100dc32b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6261,7 +6261,7 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame": 3 3 1 3 4 1 - >>> df = pd.DataFrame({'A': [[1, 2, 3], 'foo', [], [3, 4]], + >>> df = pd.DataFrame({'A': [[1, 2, 3], 'foo', [], [3, 4]], 'B': 1, 'C': [[7,8,9],'bar',[],[8,7]]}) >>> df @@ -6270,8 +6270,8 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame": 1 foo 1 bar 2 [] 1 [] 3 [3, 4] 1 [8, 7] - - >>> df.explode(['A','C']) + + >>> df.explode(['A','C']) B A C 0 1 1 7 0 1 2 8 @@ -6279,7 +6279,7 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame": 1 1 foo bar 2 1 NaN NaN 3 1 3 8 - 3 1 4 7 + 3 1 4 7 """ # Validate data @@ -6295,23 +6295,24 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame": if not all([c in self.columns for c in columns]): raise ValueError("column name(s) not in index") - tmp = self.iloc[0:0,0:0].copy() # creates empty temp df + tmp = self.iloc[0:0, 0:0].copy() # creates empty temp df lengths_equal = [] for row in self[columns].iterrows(): # converts non-lists into 1 element lists so len() is valid - r=row[1].apply(lambda x: x if type(x) in (list,tuple) else [x]) - + r = row[1].apply(lambda x: x if type(x) in (list, tuple) else [x]) + # make sure all lists in the same record are the same length row_is_ok = len(set([len(r[c]) for c in columns])) == 1 - lengths_equal.append(row_is_ok) + lengths_equal.append(row_is_ok) # Explode all columns if lengths match if all(lengths_equal): for c in columns: tmp[c] = self[c].explode() else: - raise ValueError("Exploded lists from `columns` do not have equivalent length within the same record") + e = "Elements from `columns` do not have equivalent length within in the same row" + raise ValueError(e) # join in exploded columns results = self.drop(columns, axis=1).join(tmp) From 2941421218cedd51a274da983fe05488c9f5b27e Mon Sep 17 00:00:00 2001 From: Kyle Stahl Date: Mon, 16 Sep 2019 11:48:48 -0500 Subject: [PATCH 9/9] ENH: explode method take multiple columns --- pandas/core/frame.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8f6a2100dc32b..72ea208a8b801 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6269,7 +6269,7 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame": 0 [1, 2, 3] 1 [7, 8, 9] 1 foo 1 bar 2 [] 1 [] - 3 [3, 4] 1 [8, 7] + 3 [3, 4] 1 [8, 7] >>> df.explode(['A','C']) B A C @@ -6295,7 +6295,7 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame": if not all([c in self.columns for c in columns]): raise ValueError("column name(s) not in index") - tmp = self.iloc[0:0, 0:0].copy() # creates empty temp df + tmp = self.iloc[0:0, 0:0].copy() # creates empty temp df lengths_equal = [] for row in self[columns].iterrows(): @@ -6311,7 +6311,7 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame": for c in columns: tmp[c] = self[c].explode() else: - e = "Elements from `columns` do not have equivalent length within in the same row" + e = "Elements in `columns` do not have equal length in the same row" raise ValueError(e) # join in exploded columns