@@ -8168,16 +8168,23 @@ def stack(self, level: Level = -1, dropna: bool = True):
8168
8168
8169
8169
return result .__finalize__ (self , method = "stack" )
8170
8170
8171
- def explode (self , column : str | tuple , ignore_index : bool = False ) -> DataFrame :
8171
+ def explode (
8172
+ self ,
8173
+ column : str | tuple | list [str | tuple ],
8174
+ ignore_index : bool = False ,
8175
+ ) -> DataFrame :
8172
8176
"""
8173
8177
Transform each element of a list-like to a row, replicating index values.
8174
8178
8175
8179
.. versionadded:: 0.25.0
8176
8180
8177
8181
Parameters
8178
8182
----------
8179
- column : str or tuple
8180
- Column to explode.
8183
+ column : str or tuple or list thereof
8184
+ Column(s) to explode.
8185
+ For multiple columns, specify a non-empty list with each element
8186
+ be str or tuple, and all specified columns their list-like data
8187
+ on same row of the frame must have matching length.
8181
8188
ignore_index : bool, default False
8182
8189
If True, the resulting index will be labeled 0, 1, …, n - 1.
8183
8190
@@ -8192,7 +8199,10 @@ def explode(self, column: str | tuple, ignore_index: bool = False) -> DataFrame:
8192
8199
Raises
8193
8200
------
8194
8201
ValueError :
8195
- if columns of the frame are not unique.
8202
+ * If columns of the frame are not unique.
8203
+ * If specified columns to explode is empty list.
8204
+ * If specified columns to explode have not matching count of
8205
+ elements rowwise in the frame.
8196
8206
8197
8207
See Also
8198
8208
--------
@@ -8211,32 +8221,67 @@ def explode(self, column: str | tuple, ignore_index: bool = False) -> DataFrame:
8211
8221
8212
8222
Examples
8213
8223
--------
8214
- >>> df = pd.DataFrame({'A': [[1, 2, 3], 'foo', [], [3, 4]], 'B': 1})
8224
+ >>> df = pd.DataFrame({'A': [[0, 1, 2], 'foo', [], [3, 4]],
8225
+ ... 'B': 1,
8226
+ ... 'C': [['a', 'b', 'c'], np.nan, [], ['d', 'e']]})
8215
8227
>>> df
8216
- A B
8217
- 0 [1, 2, 3 ] 1
8218
- 1 foo 1
8219
- 2 [] 1
8220
- 3 [3, 4] 1
8228
+ A B C
8229
+ 0 [0, 1, 2 ] 1 [a, b, c]
8230
+ 1 foo 1 NaN
8231
+ 2 [] 1 []
8232
+ 3 [3, 4] 1 [d, e]
8221
8233
8222
8234
>>> df.explode('A')
8223
- A B
8224
- 0 1 1
8225
- 0 2 1
8226
- 0 3 1
8227
- 1 foo 1
8228
- 2 NaN 1
8229
- 3 3 1
8230
- 3 4 1
8231
- """
8232
- if not (is_scalar (column ) or isinstance (column , tuple )):
8233
- raise ValueError ("column must be a scalar" )
8235
+ A B C
8236
+ 0 0 1 [a, b, c]
8237
+ 0 1 1 [a, b, c]
8238
+ 0 2 1 [a, b, c]
8239
+ 1 foo 1 NaN
8240
+ 2 NaN 1 []
8241
+ 3 3 1 [d, e]
8242
+ 3 4 1 [d, e]
8243
+
8244
+ >>> df.explode(list('AC'))
8245
+ A B C
8246
+ 0 0 1 a
8247
+ 0 1 1 b
8248
+ 0 2 1 c
8249
+ 1 foo 1 NaN
8250
+ 2 NaN 1 NaN
8251
+ 3 3 1 d
8252
+ 3 4 1 e
8253
+ """
8234
8254
if not self .columns .is_unique :
8235
8255
raise ValueError ("columns must be unique" )
8236
8256
8257
+ columns : list [str | tuple ]
8258
+ if is_scalar (column ) or isinstance (column , tuple ):
8259
+ # mypy: List item 0 has incompatible type "Union[str, Tuple[Any, ...],
8260
+ # List[Union[str, Tuple[Any, ...]]]]"; expected
8261
+ # "Union[str, Tuple[Any, ...]]"
8262
+ columns = [column ] # type: ignore[list-item]
8263
+ elif isinstance (column , list ) and all (
8264
+ map (lambda c : is_scalar (c ) or isinstance (c , tuple ), column )
8265
+ ):
8266
+ if len (column ) == 0 :
8267
+ raise ValueError ("column must be nonempty" )
8268
+ if len (column ) > len (set (column )):
8269
+ raise ValueError ("column must be unique" )
8270
+ columns = column
8271
+ else :
8272
+ raise ValueError ("column must be a scalar, tuple, or list thereof" )
8273
+
8237
8274
df = self .reset_index (drop = True )
8238
- result = df [column ].explode ()
8239
- result = df .drop ([column ], axis = 1 ).join (result )
8275
+ if len (columns ) == 1 :
8276
+ result = df [column ].explode ()
8277
+ else :
8278
+ mylen = lambda x : len (x ) if is_list_like (x ) else - 1
8279
+ counts0 = self [columns [0 ]].apply (mylen )
8280
+ for c in columns [1 :]:
8281
+ if not all (counts0 == self [c ].apply (mylen )):
8282
+ raise ValueError ("columns must have matching element counts" )
8283
+ result = DataFrame ({c : df [c ].explode () for c in columns })
8284
+ result = df .drop (columns , axis = 1 ).join (result )
8240
8285
if ignore_index :
8241
8286
result .index = ibase .default_index (len (result ))
8242
8287
else :
0 commit comments