pandas-dev · jreback · Jan 13, 2021 · Jan 12, 2021 · Jan 12, 2021 · jreback
diff --git a/pandas/core/describe.py b/pandas/core/describe.py
@@ -83,98 +83,15 @@ def describe_ndframe(
         raise ValueError("percentiles cannot contain duplicates")
     percentiles = unique_pcts
 
-    formatted_percentiles = format_percentiles(percentiles)
-
-    def describe_numeric_1d(series) -> "Series":
-        from pandas import Series
-
-        stat_index = ["count", "mean", "std", "min"] + formatted_percentiles + ["max"]
-        d = (
-            [series.count(), series.mean(), series.std(), series.min()]
-            + series.quantile(percentiles).tolist()
-            + [series.max()]
-        )
-        return Series(d, index=stat_index, name=series.name)
-
-    def describe_categorical_1d(data) -> "Series":
-        names = ["count", "unique"]
-        objcounts = data.value_counts()
-        count_unique = len(objcounts[objcounts != 0])
-        result = [data.count(), count_unique]
-        dtype = None
-        if result[1] > 0:
-            top, freq = objcounts.index[0], objcounts.iloc[0]
-            if is_datetime64_any_dtype(data.dtype):
-                if obj.ndim == 1:
-                    stacklevel = 5
-                else:
-                    stacklevel = 6
-                warnings.warn(
-                    "Treating datetime data as categorical rather than numeric in "
-                    "`.describe` is deprecated and will be removed in a future "
-                    "version of pandas. Specify `datetime_is_numeric=True` to "
-                    "silence this warning and adopt the future behavior now.",
-                    FutureWarning,
-                    stacklevel=stacklevel,
-                )
-                tz = data.dt.tz
-                asint = data.dropna().values.view("i8")
-                top = Timestamp(top)
-                if top.tzinfo is not None and tz is not None:
-                    # Don't tz_localize(None) if key is already tz-aware
-                    top = top.tz_convert(tz)
-                else:
-                    top = top.tz_localize(tz)
-                names += ["top", "freq", "first", "last"]
-                result += [
-                    top,
-                    freq,
-                    Timestamp(asint.min(), tz=tz),
-                    Timestamp(asint.max(), tz=tz),
-                ]
-            else:
-                names += ["top", "freq"]
-                result += [top, freq]
-
-        # If the DataFrame is empty, set 'top' and 'freq' to None
-        # to maintain output shape consistency
-        else:
-            names += ["top", "freq"]
-            result += [np.nan, np.nan]
-            dtype = "object"
-
-        from pandas import Series
-
-        return Series(result, index=names, name=data.name, dtype=dtype)
-
-    def describe_timestamp_1d(data) -> "Series":
-        # GH-30164
-        from pandas import Series
-
-        stat_index = ["count", "mean", "min"] + formatted_percentiles + ["max"]
-        d = (
-            [data.count(), data.mean(), data.min()]
-            + data.quantile(percentiles).tolist()
-            + [data.max()]
-        )
-        return Series(d, index=stat_index, name=data.name)
-
-    def describe_1d(data) -> "Series":
-        if is_bool_dtype(data.dtype):
-            return describe_categorical_1d(data)
-        elif is_numeric_dtype(data):
-            return describe_numeric_1d(data)
-        elif is_datetime64_any_dtype(data.dtype) and datetime_is_numeric:
-            return describe_timestamp_1d(data)
-        elif is_timedelta64_dtype(data.dtype):
-            return describe_numeric_1d(data)
-        else:
-            return describe_categorical_1d(data)
-
     if obj.ndim == 1:
         # Incompatible return value type
         #  (got "Series", expected "FrameOrSeries")  [return-value]
-        return describe_1d(obj)  # type:ignore[return-value]
+        return describe_1d(
+            obj,
+            percentiles,
+            datetime_is_numeric,
+            is_series=True,
+        )  # type:ignore[return-value]
     elif (include is None) and (exclude is None):
         # when some numerics are found, keep only numerics
         default_include = [np.number]
@@ -191,7 +108,10 @@ def describe_1d(data) -> "Series":
     else:
         data = obj.select_dtypes(include=include, exclude=exclude)
 
-    ldesc = [describe_1d(s) for _, s in data.items()]
+    ldesc = [
+        describe_1d(s, percentiles, datetime_is_numeric, is_series=False)
+        for _, s in data.items()
+    ]
     # set a convenient order for rows
     names: List[Hashable] = []
     ldesc_indexes = sorted((x.index for x in ldesc), key=len)
@@ -203,3 +123,143 @@ def describe_1d(data) -> "Series":
     d = concat([x.reindex(names, copy=False) for x in ldesc], axis=1, sort=False)
     d.columns = data.columns.copy()
     return d
+
+
+def describe_numeric_1d(series, percentiles) -> "Series":
+    """Describe series containing numerical data.
+
+    Parameters
+    ----------
+    series : Series
+        Series to be described.
+    percentiles : list-like of numbers, optional
+        The percentiles to include in the output.
+    """
+    from pandas import Series
+
+    formatted_percentiles = format_percentiles(percentiles)
+
+    stat_index = ["count", "mean", "std", "min"] + formatted_percentiles + ["max"]
+    d = (
+        [series.count(), series.mean(), series.std(), series.min()]
+        + series.quantile(percentiles).tolist()
+        + [series.max()]
+    )
+    return Series(d, index=stat_index, name=series.name)
+
+
+def describe_categorical_1d(data, is_series) -> "Series":
+    """Describe series containing categorical data.
+
+    Parameters
+    ----------
+    data : Series
+        Series to be described.
+    is_series : bool
+        True if the original object is a Series.
+        False if the one column of the DataFrame is described.
+    """
+    names = ["count", "unique"]
+    objcounts = data.value_counts()
+    count_unique = len(objcounts[objcounts != 0])
+    result = [data.count(), count_unique]
+    dtype = None
+    if result[1] > 0:
+        top, freq = objcounts.index[0], objcounts.iloc[0]
+        if is_datetime64_any_dtype(data.dtype):
+            if is_series:
+                stacklevel = 5
+            else:
+                stacklevel = 6
+            warnings.warn(
+                "Treating datetime data as categorical rather than numeric in "
+                "`.describe` is deprecated and will be removed in a future "
+                "version of pandas. Specify `datetime_is_numeric=True` to "
+                "silence this warning and adopt the future behavior now.",
+                FutureWarning,
+                stacklevel=stacklevel,
+            )
+            tz = data.dt.tz
+            asint = data.dropna().values.view("i8")
+            top = Timestamp(top)
+            if top.tzinfo is not None and tz is not None:
+                # Don't tz_localize(None) if key is already tz-aware
+                top = top.tz_convert(tz)
+            else:
+                top = top.tz_localize(tz)
+            names += ["top", "freq", "first", "last"]
+            result += [
+                top,
+                freq,
+                Timestamp(asint.min(), tz=tz),
+                Timestamp(asint.max(), tz=tz),
+            ]
+        else:
+            names += ["top", "freq"]
+            result += [top, freq]
+
+    # If the DataFrame is empty, set 'top' and 'freq' to None
+    # to maintain output shape consistency
+    else:
+        names += ["top", "freq"]
+        result += [np.nan, np.nan]
+        dtype = "object"
+
+    from pandas import Series
+
+    return Series(result, index=names, name=data.name, dtype=dtype)
+
+
+def describe_timestamp_1d(data, percentiles) -> "Series":
+    """Describe series containing datetime64 dtype.
+
+    Parameters
+    ----------
+    data : Series
+        Series to be described.
+    percentiles : list-like of numbers, optional
+        The percentiles to include in the output.
+    """
+    # GH-30164
+    from pandas import Series
+
+    formatted_percentiles = format_percentiles(percentiles)
+
+    stat_index = ["count", "mean", "min"] + formatted_percentiles + ["max"]
+    d = (
+        [data.count(), data.mean(), data.min()]
+        + data.quantile(percentiles).tolist()
+        + [data.max()]
+    )
+    return Series(d, index=stat_index, name=data.name)
+
+
+def describe_1d(data, percentiles, datetime_is_numeric, *, is_series) -> "Series":
+    """Describe series.
+
+    Parameters
+    ----------
+    data : Series
+        Series to be described.
+    percentiles : list-like of numbers, optional
+        The percentiles to include in the output.
+    datetime_is_numeric : bool, default False
+        Whether to treat datetime dtypes as numeric.
+    is_series : bool
+        True if the original object is a Series.
+        False if the one column of the DataFrame is described.
+
+    Returns
+    -------
+    Series
+    """
+    if is_bool_dtype(data.dtype):
+        return describe_categorical_1d(data, is_series)
+    elif is_numeric_dtype(data):
+        return describe_numeric_1d(data, percentiles)
+    elif is_datetime64_any_dtype(data.dtype) and datetime_is_numeric:
+        return describe_timestamp_1d(data, percentiles)
+    elif is_timedelta64_dtype(data.dtype):
+        return describe_numeric_1d(data, percentiles)
+    else:
+        return describe_categorical_1d(data, is_series)