@@ -7910,16 +7910,23 @@ def stack(self, level: Level = -1, dropna: bool = True):
7910
7910
7911
7911
return result .__finalize__ (self , method = "stack" )
7912
7912
7913
- def explode (self , column : str | tuple , ignore_index : bool = False ) -> DataFrame :
7913
+ def explode (
7914
+ self ,
7915
+ column : str | tuple | list [str | tuple ],
7916
+ ignore_index : bool = False ,
7917
+ ) -> DataFrame :
7914
7918
"""
7915
7919
Transform each element of a list-like to a row, replicating index values.
7916
7920
7917
7921
.. versionadded:: 0.25.0
7918
7922
7919
7923
Parameters
7920
7924
----------
7921
- column : str or tuple
7922
- Column to explode.
7925
+ column : str or tuple or list thereof
7926
+ Column(s) to explode.
7927
+ For multiple columns, specify a non-empty list with each element
7928
+ be str or tuple, and all specified columns their list-like data
7929
+ on same row of the frame must have matching length.
7923
7930
ignore_index : bool, default False
7924
7931
If True, the resulting index will be labeled 0, 1, …, n - 1.
7925
7932
@@ -7934,7 +7941,10 @@ def explode(self, column: str | tuple, ignore_index: bool = False) -> DataFrame:
7934
7941
Raises
7935
7942
------
7936
7943
ValueError :
7937
- if columns of the frame are not unique.
7944
+ * If columns of the frame are not unique.
7945
+ * If specified columns to explode is empty list.
7946
+ * If specified columns to explode have not matching count of
7947
+ elements rowwise in the frame.
7938
7948
7939
7949
See Also
7940
7950
--------
@@ -7953,32 +7963,67 @@ def explode(self, column: str | tuple, ignore_index: bool = False) -> DataFrame:
7953
7963
7954
7964
Examples
7955
7965
--------
7956
- >>> df = pd.DataFrame({'A': [[1, 2, 3], 'foo', [], [3, 4]], 'B': 1})
7966
+ >>> df = pd.DataFrame({'A': [[0, 1, 2], 'foo', [], [3, 4]],
7967
+ ... 'B': 1,
7968
+ ... 'C': [['a', 'b', 'c'], np.nan, [], ['d', 'e']]})
7957
7969
>>> df
7958
- A B
7959
- 0 [1, 2, 3 ] 1
7960
- 1 foo 1
7961
- 2 [] 1
7962
- 3 [3, 4] 1
7970
+ A B C
7971
+ 0 [0, 1, 2 ] 1 [a, b, c]
7972
+ 1 foo 1 NaN
7973
+ 2 [] 1 []
7974
+ 3 [3, 4] 1 [d, e]
7963
7975
7964
7976
>>> df.explode('A')
7965
- A B
7966
- 0 1 1
7967
- 0 2 1
7968
- 0 3 1
7969
- 1 foo 1
7970
- 2 NaN 1
7971
- 3 3 1
7972
- 3 4 1
7973
- """
7974
- if not (is_scalar (column ) or isinstance (column , tuple )):
7975
- raise ValueError ("column must be a scalar" )
7977
+ A B C
7978
+ 0 0 1 [a, b, c]
7979
+ 0 1 1 [a, b, c]
7980
+ 0 2 1 [a, b, c]
7981
+ 1 foo 1 NaN
7982
+ 2 NaN 1 []
7983
+ 3 3 1 [d, e]
7984
+ 3 4 1 [d, e]
7985
+
7986
+ >>> df.explode(list('AC'))
7987
+ A B C
7988
+ 0 0 1 a
7989
+ 0 1 1 b
7990
+ 0 2 1 c
7991
+ 1 foo 1 NaN
7992
+ 2 NaN 1 NaN
7993
+ 3 3 1 d
7994
+ 3 4 1 e
7995
+ """
7976
7996
if not self .columns .is_unique :
7977
7997
raise ValueError ("columns must be unique" )
7978
7998
7999
+ columns : list [str | tuple ]
8000
+ if is_scalar (column ) or isinstance (column , tuple ):
8001
+ # mypy: List item 0 has incompatible type "Union[str, Tuple[Any, ...],
8002
+ # List[Union[str, Tuple[Any, ...]]]]"; expected
8003
+ # "Union[str, Tuple[Any, ...]]"
8004
+ columns = [column ] # type: ignore[list-item]
8005
+ elif isinstance (column , list ) and all (
8006
+ map (lambda c : is_scalar (c ) or isinstance (c , tuple ), column )
8007
+ ):
8008
+ if len (column ) == 0 :
8009
+ raise ValueError ("column must be nonempty" )
8010
+ if len (column ) > len (set (column )):
8011
+ raise ValueError ("column must be unique" )
8012
+ columns = column
8013
+ else :
8014
+ raise ValueError ("column must be a scalar, tuple, or list thereof" )
8015
+
7979
8016
df = self .reset_index (drop = True )
7980
- result = df [column ].explode ()
7981
- result = df .drop ([column ], axis = 1 ).join (result )
8017
+ if len (columns ) == 1 :
8018
+ result = df [column ].explode ()
8019
+ else :
8020
+ mylen = lambda x : len (x ) if is_list_like (x ) else - 1
8021
+ counts0 = self [columns [0 ]].apply (mylen )
8022
+ for c in columns [1 :]:
8023
+ if not all (counts0 == self [c ].apply (mylen )):
8024
+ raise ValueError ("columns must have matching element counts" )
8025
+ result = DataFrame ({c : df [c ].explode () for c in columns })
8026
+ result = df .drop (columns , axis = 1 ).join (result )
7982
8027
if ignore_index :
7983
8028
result .index = ibase .default_index (len (result ))
7984
8029
else :
0 commit comments