|
1 | 1 | from datetime import datetime, timedelta
|
| 2 | +from io import StringIO |
2 | 3 |
|
3 | 4 | import numpy as np
|
4 | 5 | import pytest
|
5 | 6 |
|
| 7 | +from pandas.compat import PYPY |
| 8 | + |
6 | 9 | import pandas as pd
|
7 | 10 | from pandas import (
|
8 | 11 | Categorical,
|
@@ -488,22 +491,117 @@ def test_categorical_series_repr_timedelta_ordered(self):
|
488 | 491 |
|
489 | 492 | assert repr(s) == exp
|
490 | 493 |
|
491 |
| - def test_info(self, capsys): |
| 494 | + @pytest.mark.parametrize("verbose", [True, False]) |
| 495 | + def test_info(self, verbose, capsys): |
492 | 496 | index = MultiIndex(
|
493 | 497 | levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
|
494 | 498 | codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
495 | 499 | names=["first", "second"],
|
496 | 500 | )
|
497 | 501 | s = Series(range(len(index)), index=index, name="sth")
|
498 |
| - s.info() |
| 502 | + s.info(verbose=verbose) |
499 | 503 | expected = """<class 'pandas.core.series.Series'>
|
500 | 504 | MultiIndex: 10 entries, ('foo', 'one') to ('qux', 'three')
|
501 |
| -Series name: sth |
| 505 | +""" |
| 506 | + if verbose: |
| 507 | + expected += """Series name: sth |
502 | 508 | # Non-Null Count Dtype
|
503 | 509 | --- -------------- -----
|
504 | 510 | 0 10 non-null int64
|
505 |
| -dtypes: int64(1) |
506 |
| -memory usage: 505.0+ bytes |
| 511 | +""" |
| 512 | + expected += f"""dtypes: int64(1) |
| 513 | +memory usage: {s.memory_usage()}.0+ bytes |
507 | 514 | """
|
508 | 515 | result = capsys.readouterr().out
|
509 | 516 | assert result == expected
|
| 517 | + |
| 518 | + @pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result") |
| 519 | + def test_info_memory_usage_deep_not_pypy(self): |
| 520 | + s_with_object_index = pd.Series({"a": [1]}, index=["foo"]) |
| 521 | + assert s_with_object_index.memory_usage( |
| 522 | + index=True, deep=True |
| 523 | + ) > s_with_object_index.memory_usage(index=True) |
| 524 | + |
| 525 | + s_object = pd.Series({"a": ["a"]}) |
| 526 | + assert s_object.memory_usage(deep=True) > s_object.memory_usage() |
| 527 | + |
| 528 | + @pytest.mark.skipif(not PYPY, reason="on PyPy deep=True doesn't change result") |
| 529 | + def test_info_memory_usage_deep_pypy(self): |
| 530 | + s_with_object_index = pd.Series({"a": [1]}, index=["foo"]) |
| 531 | + assert s_with_object_index.memory_usage( |
| 532 | + index=True, deep=True |
| 533 | + ) == s_with_object_index.memory_usage(index=True) |
| 534 | + |
| 535 | + s_object = pd.Series({"a": ["a"]}) |
| 536 | + assert s_object.memory_usage(deep=True) == s_object.memory_usage() |
| 537 | + |
| 538 | + def test_info_memory_usage_qualified(self): |
| 539 | + |
| 540 | + buf = StringIO() |
| 541 | + s = pd.Series(1, index=[1, 2, 3]) |
| 542 | + s.info(buf=buf) |
| 543 | + assert "+" not in buf.getvalue() |
| 544 | + |
| 545 | + buf = StringIO() |
| 546 | + s = pd.Series(1, index=list("ABC")) |
| 547 | + s.info(buf=buf) |
| 548 | + assert "+" in buf.getvalue() |
| 549 | + |
| 550 | + buf = StringIO() |
| 551 | + s = Series(1, index=pd.MultiIndex.from_product([range(3), range(3)]),) |
| 552 | + s.info(buf=buf) |
| 553 | + assert "+" not in buf.getvalue() |
| 554 | + |
| 555 | + buf = StringIO() |
| 556 | + s = Series(1, index=pd.MultiIndex.from_product([range(3), ["foo", "bar"]]),) |
| 557 | + s.info(buf=buf) |
| 558 | + assert "+" in buf.getvalue() |
| 559 | + |
| 560 | + def test_info_memory_usage_bug_on_multiindex(self): |
| 561 | + # GH 14308 |
| 562 | + # memory usage introspection should not materialize .values |
| 563 | + |
| 564 | + from string import ascii_uppercase as uppercase |
| 565 | + |
| 566 | + def memory_usage(f): |
| 567 | + return f.memory_usage(deep=True) |
| 568 | + |
| 569 | + N = 100 |
| 570 | + M = len(uppercase) |
| 571 | + index = pd.MultiIndex.from_product( |
| 572 | + [list(uppercase), pd.date_range("20160101", periods=N)], |
| 573 | + names=["id", "date"], |
| 574 | + ) |
| 575 | + s = Series(np.random.randn(N * M), index=index) |
| 576 | + |
| 577 | + unstacked = s.unstack("id") |
| 578 | + assert s.values.nbytes == unstacked.values.nbytes |
| 579 | + assert s.memory_usage(deep=True) > unstacked.memory_usage(deep=True).sum() |
| 580 | + |
| 581 | + # high upper bound |
| 582 | + assert ( |
| 583 | + unstacked.memory_usage(deep=True).sum() - s.memory_usage(deep=True) < 2000 |
| 584 | + ) |
| 585 | + |
| 586 | + def test_info_categorical(self): |
| 587 | + # GH14298 |
| 588 | + idx = pd.CategoricalIndex(["a", "b"]) |
| 589 | + s = pd.Series(np.zeros((2)), index=idx) |
| 590 | + |
| 591 | + buf = StringIO() |
| 592 | + s.info(buf=buf) |
| 593 | + |
| 594 | + def test_info_categorical_column(self): |
| 595 | + |
| 596 | + # make sure it works |
| 597 | + n = 2500 |
| 598 | + s = Series( |
| 599 | + np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n)) |
| 600 | + ).astype("category") |
| 601 | + s.isna() |
| 602 | + buf = StringIO() |
| 603 | + s.info(buf=buf) |
| 604 | + |
| 605 | + s2 = s[s == "d"] |
| 606 | + buf = StringIO() |
| 607 | + s2.info(buf=buf) |
0 commit comments