|
6 | 6 | from pandas.errors import PerformanceWarning
|
7 | 7 |
|
8 | 8 | import pandas as pd
|
9 |
| -from pandas import DataFrame, Index, MultiIndex |
| 9 | +from pandas import DataFrame, Index, MultiIndex, Series, Timestamp |
10 | 10 | import pandas._testing as tm
|
11 | 11 |
|
12 | 12 |
|
@@ -258,3 +258,162 @@ def test_drop_non_empty_list(self, index, drop_labels):
|
258 | 258 | # GH# 21494
|
259 | 259 | with pytest.raises(KeyError, match="not found in axis"):
|
260 | 260 | pd.DataFrame(index=index).drop(drop_labels)
|
| 261 | + |
| 262 | + def test_mixed_depth_drop(self): |
| 263 | + arrays = [ |
| 264 | + ["a", "top", "top", "routine1", "routine1", "routine2"], |
| 265 | + ["", "OD", "OD", "result1", "result2", "result1"], |
| 266 | + ["", "wx", "wy", "", "", ""], |
| 267 | + ] |
| 268 | + |
| 269 | + tuples = sorted(zip(*arrays)) |
| 270 | + index = MultiIndex.from_tuples(tuples) |
| 271 | + df = DataFrame(np.random.randn(4, 6), columns=index) |
| 272 | + |
| 273 | + result = df.drop("a", axis=1) |
| 274 | + expected = df.drop([("a", "", "")], axis=1) |
| 275 | + tm.assert_frame_equal(expected, result) |
| 276 | + |
| 277 | + result = df.drop(["top"], axis=1) |
| 278 | + expected = df.drop([("top", "OD", "wx")], axis=1) |
| 279 | + expected = expected.drop([("top", "OD", "wy")], axis=1) |
| 280 | + tm.assert_frame_equal(expected, result) |
| 281 | + |
| 282 | + result = df.drop(("top", "OD", "wx"), axis=1) |
| 283 | + expected = df.drop([("top", "OD", "wx")], axis=1) |
| 284 | + tm.assert_frame_equal(expected, result) |
| 285 | + |
| 286 | + expected = df.drop([("top", "OD", "wy")], axis=1) |
| 287 | + expected = df.drop("top", axis=1) |
| 288 | + |
| 289 | + result = df.drop("result1", level=1, axis=1) |
| 290 | + expected = df.drop( |
| 291 | + [("routine1", "result1", ""), ("routine2", "result1", "")], axis=1 |
| 292 | + ) |
| 293 | + tm.assert_frame_equal(expected, result) |
| 294 | + |
| 295 | + def test_drop_multiindex_other_level_nan(self): |
| 296 | + # GH#12754 |
| 297 | + df = ( |
| 298 | + DataFrame( |
| 299 | + { |
| 300 | + "A": ["one", "one", "two", "two"], |
| 301 | + "B": [np.nan, 0.0, 1.0, 2.0], |
| 302 | + "C": ["a", "b", "c", "c"], |
| 303 | + "D": [1, 2, 3, 4], |
| 304 | + } |
| 305 | + ) |
| 306 | + .set_index(["A", "B", "C"]) |
| 307 | + .sort_index() |
| 308 | + ) |
| 309 | + result = df.drop("c", level="C") |
| 310 | + expected = DataFrame( |
| 311 | + [2, 1], |
| 312 | + columns=["D"], |
| 313 | + index=pd.MultiIndex.from_tuples( |
| 314 | + [("one", 0.0, "b"), ("one", np.nan, "a")], names=["A", "B", "C"] |
| 315 | + ), |
| 316 | + ) |
| 317 | + tm.assert_frame_equal(result, expected) |
| 318 | + |
| 319 | + def test_drop_nonunique(self): |
| 320 | + df = DataFrame( |
| 321 | + [ |
| 322 | + ["x-a", "x", "a", 1.5], |
| 323 | + ["x-a", "x", "a", 1.2], |
| 324 | + ["z-c", "z", "c", 3.1], |
| 325 | + ["x-a", "x", "a", 4.1], |
| 326 | + ["x-b", "x", "b", 5.1], |
| 327 | + ["x-b", "x", "b", 4.1], |
| 328 | + ["x-b", "x", "b", 2.2], |
| 329 | + ["y-a", "y", "a", 1.2], |
| 330 | + ["z-b", "z", "b", 2.1], |
| 331 | + ], |
| 332 | + columns=["var1", "var2", "var3", "var4"], |
| 333 | + ) |
| 334 | + |
| 335 | + grp_size = df.groupby("var1").size() |
| 336 | + drop_idx = grp_size.loc[grp_size == 1] |
| 337 | + |
| 338 | + idf = df.set_index(["var1", "var2", "var3"]) |
| 339 | + |
| 340 | + # it works! GH#2101 |
| 341 | + result = idf.drop(drop_idx.index, level=0).reset_index() |
| 342 | + expected = df[-df.var1.isin(drop_idx.index)] |
| 343 | + |
| 344 | + result.index = expected.index |
| 345 | + |
| 346 | + tm.assert_frame_equal(result, expected) |
| 347 | + |
| 348 | + def test_drop_level(self): |
| 349 | + index = MultiIndex( |
| 350 | + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], |
| 351 | + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], |
| 352 | + names=["first", "second"], |
| 353 | + ) |
| 354 | + frame = DataFrame( |
| 355 | + np.random.randn(10, 3), |
| 356 | + index=index, |
| 357 | + columns=Index(["A", "B", "C"], name="exp"), |
| 358 | + ) |
| 359 | + |
| 360 | + result = frame.drop(["bar", "qux"], level="first") |
| 361 | + expected = frame.iloc[[0, 1, 2, 5, 6]] |
| 362 | + tm.assert_frame_equal(result, expected) |
| 363 | + |
| 364 | + result = frame.drop(["two"], level="second") |
| 365 | + expected = frame.iloc[[0, 2, 3, 6, 7, 9]] |
| 366 | + tm.assert_frame_equal(result, expected) |
| 367 | + |
| 368 | + result = frame.T.drop(["bar", "qux"], axis=1, level="first") |
| 369 | + expected = frame.iloc[[0, 1, 2, 5, 6]].T |
| 370 | + tm.assert_frame_equal(result, expected) |
| 371 | + |
| 372 | + result = frame.T.drop(["two"], axis=1, level="second") |
| 373 | + expected = frame.iloc[[0, 2, 3, 6, 7, 9]].T |
| 374 | + tm.assert_frame_equal(result, expected) |
| 375 | + |
| 376 | + def test_drop_level_nonunique_datetime(self): |
| 377 | + # GH#12701 |
| 378 | + idx = Index([2, 3, 4, 4, 5], name="id") |
| 379 | + idxdt = pd.to_datetime( |
| 380 | + [ |
| 381 | + "201603231400", |
| 382 | + "201603231500", |
| 383 | + "201603231600", |
| 384 | + "201603231600", |
| 385 | + "201603231700", |
| 386 | + ] |
| 387 | + ) |
| 388 | + df = DataFrame(np.arange(10).reshape(5, 2), columns=list("ab"), index=idx) |
| 389 | + df["tstamp"] = idxdt |
| 390 | + df = df.set_index("tstamp", append=True) |
| 391 | + ts = Timestamp("201603231600") |
| 392 | + assert df.index.is_unique is False |
| 393 | + |
| 394 | + result = df.drop(ts, level="tstamp") |
| 395 | + expected = df.loc[idx != 4] |
| 396 | + tm.assert_frame_equal(result, expected) |
| 397 | + |
| 398 | + @pytest.mark.parametrize("box", [Series, DataFrame]) |
| 399 | + def test_drop_tz_aware_timestamp_across_dst(self, box): |
| 400 | + # GH#21761 |
| 401 | + start = Timestamp("2017-10-29", tz="Europe/Berlin") |
| 402 | + end = Timestamp("2017-10-29 04:00:00", tz="Europe/Berlin") |
| 403 | + index = pd.date_range(start, end, freq="15min") |
| 404 | + data = box(data=[1] * len(index), index=index) |
| 405 | + result = data.drop(start) |
| 406 | + expected_start = Timestamp("2017-10-29 00:15:00", tz="Europe/Berlin") |
| 407 | + expected_idx = pd.date_range(expected_start, end, freq="15min") |
| 408 | + expected = box(data=[1] * len(expected_idx), index=expected_idx) |
| 409 | + tm.assert_equal(result, expected) |
| 410 | + |
| 411 | + def test_drop_preserve_names(self): |
| 412 | + index = MultiIndex.from_arrays( |
| 413 | + [[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]], names=["one", "two"] |
| 414 | + ) |
| 415 | + |
| 416 | + df = DataFrame(np.random.randn(6, 3), index=index) |
| 417 | + |
| 418 | + result = df.drop([(0, 2)]) |
| 419 | + assert result.index.names == ("one", "two") |
0 commit comments