diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 71903d10a6983..5becbf0a87472 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -217,6 +217,7 @@ Other enhancements
- Added "Juneteenth National Independence Day" to
``USFederalHolidayCalendar``. See also `Other API changes`_.
- :meth:`.Rolling.var`, :meth:`.Expanding.var`, :meth:`.Rolling.std`, :meth:`.Expanding.std` now support `Numba `_ execution with the ``engine`` keyword (:issue:`44461`)
+- :meth:`Series.info` has been added, for compatibility with :meth:`DataFrame.info` (:issue:`5167`)
.. ---------------------------------------------------------------------------
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 8c85c4e961d99..8ebdb3b890d77 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -206,6 +206,7 @@
format as fmt,
)
from pandas.io.formats.info import (
+ INFO_DOCSTRING,
DataFrameInfo,
frame_sub_kwargs,
)
@@ -3138,7 +3139,7 @@ def to_xml(
return xml_formatter.write_output()
# ----------------------------------------------------------------------
- @doc(DataFrameInfo.render, **frame_sub_kwargs)
+ @doc(INFO_DOCSTRING, **frame_sub_kwargs)
def info(
self,
verbose: bool | None = None,
diff --git a/pandas/core/series.py b/pandas/core/series.py
index ffa31b4f66211..b3133ee1275a1 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -139,6 +139,11 @@
from pandas.core.tools.datetimes import to_datetime
import pandas.io.formats.format as fmt
+from pandas.io.formats.info import (
+ INFO_DOCSTRING,
+ SeriesInfo,
+ series_sub_kwargs,
+)
import pandas.plotting
if TYPE_CHECKING:
@@ -4914,6 +4919,22 @@ def replace(
method=method,
)
+ @doc(INFO_DOCSTRING, **series_sub_kwargs)
+ def info(
+ self,
+ verbose: bool | None = None,
+ buf: IO[str] | None = None,
+ max_cols: int | None = None,
+ memory_usage: bool | str | None = None,
+ show_counts: bool = True,
+ ) -> None:
+ return SeriesInfo(self, memory_usage).render(
+ buf=buf,
+ max_cols=max_cols,
+ verbose=verbose,
+ show_counts=show_counts,
+ )
+
def _replace_single(self, to_replace, method: str, inplace: bool, limit):
"""
Replaces values in a Series using the fill method specified when no
diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py
index 9340d020cd6ce..4a9310c6dccf8 100644
--- a/pandas/io/formats/info.py
+++ b/pandas/io/formats/info.py
@@ -20,7 +20,6 @@
Dtype,
WriteBuffer,
)
-from pandas.util._decorators import doc
from pandas.core.indexes.api import Index
@@ -51,7 +50,11 @@
only if the DataFrame is smaller than
``pandas.options.display.max_info_rows`` and
``pandas.options.display.max_info_columns``. A value of True always
- shows the counts, and False never shows the counts.
+ shows the counts, and False never shows the counts."""
+)
+
+null_counts_sub = dedent(
+ """
null_counts : bool, optional
.. deprecated:: 1.2.0
Use show_counts instead."""
@@ -157,12 +160,94 @@
"type_sub": " and columns",
"max_cols_sub": frame_max_cols_sub,
"show_counts_sub": show_counts_sub,
+ "null_counts_sub": null_counts_sub,
"examples_sub": frame_examples_sub,
"see_also_sub": frame_see_also_sub,
"version_added_sub": "",
}
+series_examples_sub = dedent(
+ """\
+ >>> int_values = [1, 2, 3, 4, 5]
+ >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
+ >>> s = pd.Series(text_values, index=int_values)
+ >>> s.info()
+
+ Int64Index: 5 entries, 1 to 5
+ Series name: None
+ Non-Null Count Dtype
+ -------------- -----
+ 5 non-null object
+ dtypes: object(1)
+ memory usage: 80.0+ bytes
+
+ Prints a summary excluding information about its values:
+
+ >>> s.info(verbose=False)
+
+ Int64Index: 5 entries, 1 to 5
+ dtypes: object(1)
+ memory usage: 80.0+ bytes
+
+ Pipe output of Series.info to buffer instead of sys.stdout, get
+ buffer content and writes to a text file:
+
+ >>> import io
+ >>> buffer = io.StringIO()
+ >>> s.info(buf=buffer)
+ >>> s = buffer.getvalue()
+ >>> with open("df_info.txt", "w",
+ ... encoding="utf-8") as f: # doctest: +SKIP
+ ... f.write(s)
+ 260
+
+ The `memory_usage` parameter allows deep introspection mode, specially
+ useful for big Series and fine-tune memory optimization:
+
+ >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
+ >>> s = pd.Series(np.random.choice(['a', 'b', 'c'], 10 ** 6))
+ >>> s.info()
+
+ RangeIndex: 1000000 entries, 0 to 999999
+ Series name: None
+ Non-Null Count Dtype
+ -------------- -----
+ 1000000 non-null object
+ dtypes: object(1)
+ memory usage: 7.6+ MB
+
+ >>> s.info(memory_usage='deep')
+
+ RangeIndex: 1000000 entries, 0 to 999999
+ Series name: None
+ Non-Null Count Dtype
+ -------------- -----
+ 1000000 non-null object
+ dtypes: object(1)
+ memory usage: 55.3 MB"""
+)
+
+
+series_see_also_sub = dedent(
+ """\
+ Series.describe: Generate descriptive statistics of Series.
+ Series.memory_usage: Memory usage of Series."""
+)
+
+
+series_sub_kwargs = {
+ "klass": "Series",
+ "type_sub": "",
+ "max_cols_sub": "",
+ "show_counts_sub": show_counts_sub,
+ "null_counts_sub": "",
+ "examples_sub": series_examples_sub,
+ "see_also_sub": series_see_also_sub,
+ "version_added_sub": "\n.. versionadded:: 1.4.0\n",
+}
+
+
INFO_DOCSTRING = dedent(
"""
Print a concise summary of a {klass}.
@@ -181,7 +266,7 @@
buf : writable buffer, defaults to sys.stdout
Where to send the output. By default, the output is printed to
sys.stdout. Pass a writable buffer if you need to further process
- the output.
+ the output.\
{max_cols_sub}
memory_usage : bool, str, optional
Specifies whether total memory usage of the {klass}
@@ -196,7 +281,7 @@
consume the same memory amount for corresponding dtypes. With deep
memory introspection, a real memory usage calculation is performed
at the cost of computational resources.
- {show_counts_sub}
+ {show_counts_sub}{null_counts_sub}
Returns
-------
@@ -422,16 +507,6 @@ def memory_usage_bytes(self) -> int:
deep = False
return self.data.memory_usage(index=True, deep=deep).sum()
- @doc(
- INFO_DOCSTRING,
- klass="DataFrame",
- type_sub=" and columns",
- max_cols_sub=frame_max_cols_sub,
- show_counts_sub=show_counts_sub,
- examples_sub=frame_examples_sub,
- see_also_sub=frame_see_also_sub,
- version_added_sub="",
- )
def render(
self,
*,
@@ -449,6 +524,69 @@ def render(
printer.to_buffer(buf)
+class SeriesInfo(BaseInfo):
+ """
+ Class storing series-specific info.
+ """
+
+ def __init__(
+ self,
+ data: Series,
+ memory_usage: bool | str | None = None,
+ ):
+ self.data: Series = data
+ self.memory_usage = _initialize_memory_usage(memory_usage)
+
+ def render(
+ self,
+ *,
+ buf: WriteBuffer[str] | None = None,
+ max_cols: int | None = None,
+ verbose: bool | None = None,
+ show_counts: bool | None = None,
+ ) -> None:
+ if max_cols is not None:
+ raise ValueError(
+ "Argument `max_cols` can only be passed "
+ "in DataFrame.info, not Series.info"
+ )
+ printer = SeriesInfoPrinter(
+ info=self,
+ verbose=verbose,
+ show_counts=show_counts,
+ )
+ printer.to_buffer(buf)
+
+ @property
+ def non_null_counts(self) -> Sequence[int]:
+ return [self.data.count()]
+
+ @property
+ def dtypes(self) -> Iterable[Dtype]:
+ return [self.data.dtypes]
+
+ @property
+ def dtype_counts(self):
+ from pandas.core.frame import DataFrame
+
+ return _get_dataframe_dtype_counts(DataFrame(self.data))
+
+ @property
+ def memory_usage_bytes(self) -> int:
+ """Memory usage in bytes.
+
+ Returns
+ -------
+ memory_usage_bytes : int
+ Object's total memory usage in bytes.
+ """
+ if self.memory_usage == "deep":
+ deep = True
+ else:
+ deep = False
+ return self.data.memory_usage(index=True, deep=deep)
+
+
class InfoPrinterAbstract:
"""
Class for printing dataframe or series info.
@@ -548,6 +686,49 @@ def _create_table_builder(self) -> DataFrameTableBuilder:
)
+class SeriesInfoPrinter(InfoPrinterAbstract):
+ """Class for printing series info.
+
+ Parameters
+ ----------
+ info : SeriesInfo
+ Instance of SeriesInfo.
+ verbose : bool, optional
+ Whether to print the full summary.
+ show_counts : bool, optional
+ Whether to show the non-null counts.
+ """
+
+ def __init__(
+ self,
+ info: SeriesInfo,
+ verbose: bool | None = None,
+ show_counts: bool | None = None,
+ ):
+ self.info = info
+ self.data = info.data
+ self.verbose = verbose
+ self.show_counts = self._initialize_show_counts(show_counts)
+
+ def _create_table_builder(self) -> SeriesTableBuilder:
+ """
+ Create instance of table builder based on verbosity.
+ """
+ if self.verbose or self.verbose is None:
+ return SeriesTableBuilderVerbose(
+ info=self.info,
+ with_counts=self.show_counts,
+ )
+ else:
+ return SeriesTableBuilderNonVerbose(info=self.info)
+
+ def _initialize_show_counts(self, show_counts: bool | None) -> bool:
+ if show_counts is None:
+ return True
+ else:
+ return show_counts
+
+
class TableBuilderAbstract(ABC):
"""
Abstract builder for info table.
@@ -832,6 +1013,102 @@ def _gen_columns(self) -> Iterator[str]:
yield pprint_thing(col)
+class SeriesTableBuilder(TableBuilderAbstract):
+ """
+ Abstract builder for series info table.
+
+ Parameters
+ ----------
+ info : SeriesInfo.
+ Instance of SeriesInfo.
+ """
+
+ def __init__(self, *, info: SeriesInfo):
+ self.info: SeriesInfo = info
+
+ def get_lines(self) -> list[str]:
+ self._lines = []
+ self._fill_non_empty_info()
+ return self._lines
+
+ @property
+ def data(self) -> Series:
+ """Series."""
+ return self.info.data
+
+ def add_memory_usage_line(self) -> None:
+ """Add line containing memory usage."""
+ self._lines.append(f"memory usage: {self.memory_usage_string}")
+
+ @abstractmethod
+ def _fill_non_empty_info(self) -> None:
+ """Add lines to the info table, pertaining to non-empty series."""
+
+
+class SeriesTableBuilderNonVerbose(SeriesTableBuilder):
+ """
+ Series info table builder for non-verbose output.
+ """
+
+ def _fill_non_empty_info(self) -> None:
+ """Add lines to the info table, pertaining to non-empty series."""
+ self.add_object_type_line()
+ self.add_index_range_line()
+ self.add_dtypes_line()
+ if self.display_memory_usage:
+ self.add_memory_usage_line()
+
+
+class SeriesTableBuilderVerbose(SeriesTableBuilder, TableBuilderVerboseMixin):
+ """
+ Series info table builder for verbose output.
+ """
+
+ def __init__(
+ self,
+ *,
+ info: SeriesInfo,
+ with_counts: bool,
+ ):
+ self.info = info
+ self.with_counts = with_counts
+ self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())
+ self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()
+
+ def _fill_non_empty_info(self) -> None:
+ """Add lines to the info table, pertaining to non-empty series."""
+ self.add_object_type_line()
+ self.add_index_range_line()
+ self.add_series_name_line()
+ self.add_header_line()
+ self.add_separator_line()
+ self.add_body_lines()
+ self.add_dtypes_line()
+ if self.display_memory_usage:
+ self.add_memory_usage_line()
+
+ def add_series_name_line(self):
+ self._lines.append(f"Series name: {self.data.name}")
+
+ @property
+ def headers(self) -> Sequence[str]:
+ """Headers names of the columns in verbose table."""
+ if self.with_counts:
+ return ["Non-Null Count", "Dtype"]
+ return ["Dtype"]
+
+ def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
+ """Iterator with string representation of body data without counts."""
+ yield from self._gen_dtypes()
+
+ def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
+ """Iterator with string representation of body data with counts."""
+ yield from zip(
+ self._gen_non_null_counts(),
+ self._gen_dtypes(),
+ )
+
+
def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]:
"""
Create mapping between datatypes and their number of occurrences.
diff --git a/pandas/tests/io/formats/test_series_info.py b/pandas/tests/io/formats/test_series_info.py
new file mode 100644
index 0000000000000..1f755d675d078
--- /dev/null
+++ b/pandas/tests/io/formats/test_series_info.py
@@ -0,0 +1,183 @@
+from io import StringIO
+from string import ascii_uppercase as uppercase
+import textwrap
+
+import numpy as np
+import pytest
+
+from pandas.compat import PYPY
+
+from pandas import (
+ CategoricalIndex,
+ MultiIndex,
+ Series,
+ date_range,
+)
+
+
+def test_info_categorical_column_just_works():
+ n = 2500
+ data = np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n))
+ s = Series(data).astype("category")
+ s.isna()
+ buf = StringIO()
+ s.info(buf=buf)
+
+ s2 = s[s == "d"]
+ buf = StringIO()
+ s2.info(buf=buf)
+
+
+def test_info_categorical():
+ # GH14298
+ idx = CategoricalIndex(["a", "b"])
+ s = Series(np.zeros(2), index=idx)
+ buf = StringIO()
+ s.info(buf=buf)
+
+
+@pytest.mark.parametrize("verbose", [True, False])
+def test_info_series(verbose):
+ index = MultiIndex(
+ levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
+ codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
+ names=["first", "second"],
+ )
+ s = Series(range(len(index)), index=index, name="sth")
+ buf = StringIO()
+ s.info(verbose=verbose, buf=buf)
+ result = buf.getvalue()
+
+ expected = textwrap.dedent(
+ """\
+
+ MultiIndex: 10 entries, ('foo', 'one') to ('qux', 'three')
+ """
+ )
+ if verbose:
+ expected += textwrap.dedent(
+ """\
+ Series name: sth
+ Non-Null Count Dtype
+ -------------- -----
+ 10 non-null int64
+ """
+ )
+ expected += textwrap.dedent(
+ f"""\
+ dtypes: int64(1)
+ memory usage: {s.memory_usage()}.0+ bytes
+ """
+ )
+ assert result == expected
+
+
+def test_info_memory():
+ s = Series([1, 2], dtype="i8")
+ buf = StringIO()
+ s.info(buf=buf)
+ result = buf.getvalue()
+ memory_bytes = float(s.memory_usage())
+ expected = textwrap.dedent(
+ f"""\
+
+ RangeIndex: 2 entries, 0 to 1
+ Series name: None
+ Non-Null Count Dtype
+ -------------- -----
+ 2 non-null int64
+ dtypes: int64(1)
+ memory usage: {memory_bytes} bytes
+ """
+ )
+ assert result == expected
+
+
+def test_info_wide():
+ s = Series(np.random.randn(101))
+ msg = "Argument `max_cols` can only be passed in DataFrame.info, not Series.info"
+ with pytest.raises(ValueError, match=msg):
+ s.info(max_cols=1)
+
+
+def test_info_shows_dtypes():
+ dtypes = [
+ "int64",
+ "float64",
+ "datetime64[ns]",
+ "timedelta64[ns]",
+ "complex128",
+ "object",
+ "bool",
+ ]
+ n = 10
+ for dtype in dtypes:
+ s = Series(np.random.randint(2, size=n).astype(dtype))
+ buf = StringIO()
+ s.info(buf=buf)
+ res = buf.getvalue()
+ name = f"{n:d} non-null {dtype}"
+ assert name in res
+
+
+@pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result")
+def test_info_memory_usage_deep_not_pypy():
+ s_with_object_index = Series({"a": [1]}, index=["foo"])
+ assert s_with_object_index.memory_usage(
+ index=True, deep=True
+ ) > s_with_object_index.memory_usage(index=True)
+
+ s_object = Series({"a": ["a"]})
+ assert s_object.memory_usage(deep=True) > s_object.memory_usage()
+
+
+@pytest.mark.skipif(not PYPY, reason="on PyPy deep=True does not change result")
+def test_info_memory_usage_deep_pypy():
+ s_with_object_index = Series({"a": [1]}, index=["foo"])
+ assert s_with_object_index.memory_usage(
+ index=True, deep=True
+ ) == s_with_object_index.memory_usage(index=True)
+
+ s_object = Series({"a": ["a"]})
+ assert s_object.memory_usage(deep=True) == s_object.memory_usage()
+
+
+@pytest.mark.parametrize(
+ "series, plus",
+ [
+ (Series(1, index=[1, 2, 3]), False),
+ (Series(1, index=list("ABC")), True),
+ (Series(1, index=MultiIndex.from_product([range(3), range(3)])), False),
+ (
+ Series(1, index=MultiIndex.from_product([range(3), ["foo", "bar"]])),
+ True,
+ ),
+ ],
+)
+def test_info_memory_usage_qualified(series, plus):
+ buf = StringIO()
+ series.info(buf=buf)
+ if plus:
+ assert "+" in buf.getvalue()
+ else:
+ assert "+" not in buf.getvalue()
+
+
+def test_info_memory_usage_bug_on_multiindex():
+ # GH 14308
+ # memory usage introspection should not materialize .values
+ N = 100
+ M = len(uppercase)
+ index = MultiIndex.from_product(
+ [list(uppercase), date_range("20160101", periods=N)],
+ names=["id", "date"],
+ )
+ s = Series(np.random.randn(N * M), index=index)
+
+ unstacked = s.unstack("id")
+ assert s.values.nbytes == unstacked.values.nbytes
+ assert s.memory_usage(deep=True) > unstacked.memory_usage(deep=True).sum()
+
+ # high upper bound
+ diff = unstacked.memory_usage(deep=True).sum() - s.memory_usage(deep=True)
+ assert diff < 2000
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
index 4e4eb89328540..a01b8d304d05d 100644
--- a/pandas/tests/series/test_api.py
+++ b/pandas/tests/series/test_api.py
@@ -110,12 +110,6 @@ def test_not_hashable(self):
def test_contains(self, datetime_series):
tm.assert_contains_all(datetime_series.index, datetime_series)
- def test_raise_on_info(self):
- s = Series(np.random.randn(10))
- msg = "'Series' object has no attribute 'info'"
- with pytest.raises(AttributeError, match=msg):
- s.info()
-
def test_axis_alias(self):
s = Series([1, 2, np.nan])
tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index"))