@@ -7925,7 +7925,9 @@ def stack(self, level: Level = -1, dropna: bool = True):
7925
7925
return result .__finalize__ (self , method = "stack" )
7926
7926
7927
7927
def explode (
7928
- self , column : Union [str , Tuple ], ignore_index : bool = False
7928
+ self ,
7929
+ column : Union [str , Tuple , List [Union [str , Tuple ]]],
7930
+ ignore_index : bool = False
7929
7931
) -> DataFrame :
7930
7932
"""
7931
7933
Transform each element of a list-like to a row, replicating index values.
@@ -7934,8 +7936,8 @@ def explode(
7934
7936
7935
7937
Parameters
7936
7938
----------
7937
- column : str or tuple
7938
- Column to explode.
7939
+ column : str or tuple or list thereof
7940
+ Column(s) to explode.
7939
7941
ignore_index : bool, default False
7940
7942
If True, the resulting index will be labeled 0, 1, …, n - 1.
7941
7943
@@ -7969,32 +7971,61 @@ def explode(
7969
7971
7970
7972
Examples
7971
7973
--------
7972
- >>> df = pd.DataFrame({'A': [[1, 2, 3], 'foo', [], [3, 4]], 'B': 1})
7974
+ >>> df = pd.DataFrame({'A': [[0, 1, 2], 'foo', [], [3, 4]],
7975
+ ... 'B': 1,
7976
+ ... 'C': [['a', 'b', 'c'], np.nan, [], ['d', 'e']]})
7973
7977
>>> df
7974
- A B
7975
- 0 [1, 2, 3 ] 1
7976
- 1 foo 1
7977
- 2 [] 1
7978
- 3 [3, 4] 1
7978
+ A B C
7979
+ 0 [0, 1, 2 ] 1 [a, b, c]
7980
+ 1 foo 1 NaN
7981
+ 2 [] 1 []
7982
+ 3 [3, 4] 1 [d, e]
7979
7983
7980
7984
>>> df.explode('A')
7981
- A B
7982
- 0 1 1
7983
- 0 2 1
7984
- 0 3 1
7985
- 1 foo 1
7986
- 2 NaN 1
7987
- 3 3 1
7988
- 3 4 1
7989
- """
7990
- if not (is_scalar (column ) or isinstance (column , tuple )):
7991
- raise ValueError ("column must be a scalar" )
7985
+ A B C
7986
+ 0 0 1 [a, b, c]
7987
+ 0 1 1 [a, b, c]
7988
+ 0 2 1 [a, b, c]
7989
+ 1 foo 1 NaN
7990
+ 2 NaN 1 []
7991
+ 3 3 1 [d, e]
7992
+ 3 4 1 [d, e]
7993
+
7994
+ >>> df.explode(list('AC'))
7995
+ A B C
7996
+ 0 0 1 a
7997
+ 0 1 1 b
7998
+ 0 2 1 c
7999
+ 1 foo 1 NaN
8000
+ 2 NaN 1 NaN
8001
+ 3 3 1 d
8002
+ 3 4 1 e
8003
+ """
7992
8004
if not self .columns .is_unique :
7993
8005
raise ValueError ("columns must be unique" )
8006
+ if (is_scalar (column ) or isinstance (column , tuple )):
8007
+ columns = [column ]
8008
+ elif (isinstance (column , list ) and
8009
+ all (map (lambda c : is_scalar (c ) or isinstance (c , tuple ),
8010
+ column ))):
8011
+ if len (column ) > len (set (column )):
8012
+ raise ValueError ("column must be unique" )
8013
+ # mypy: Incompatible types in assignment (expression has type
8014
+ # "List[Union[str, Tuple[Any, ...]]]", variable has type
8015
+ # "List[Union[str, Tuple[Any, ...], List[Union[str, Tuple[Any, ...]]]]]")
8016
+ columns = column # type: ignore[assignment]
8017
+ else :
8018
+ raise ValueError ("column must be a scalar, tuple, or list thereof" )
8019
+
8020
+ mylen = lambda x : len (x ) if is_list_like (x ) else - 1
8021
+ counts0 = self [columns [0 ]].apply (mylen )
8022
+ for c in columns [1 :]:
8023
+ if not all (counts0 == self [c ].apply (mylen )):
8024
+ raise ValueError ("columns must have matching element counts" )
7994
8025
7995
8026
df = self .reset_index (drop = True )
7996
- result = df [column ].explode ()
7997
- result = df .drop ([ column ] , axis = 1 ).join (result )
8027
+ result = DataFrame ({ c : df [c ].explode () for c in columns } )
8028
+ result = df .drop (columns , axis = 1 ).join (result )
7998
8029
if ignore_index :
7999
8030
result .index = ibase .default_index (len (result ))
8000
8031
else :
0 commit comments