From 1d6aa0e8dc9f4455d1a6edd5c1be43ad7f13cc80 Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 26 Dec 2016 20:43:18 -0800 Subject: [PATCH 1/9] DOC: Clarified and expanded describe documentation --- pandas/core/generic.py | 230 +++++++++++++++++++++++++++++++++-------- 1 file changed, 189 insertions(+), 41 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 77c2699f5a432..adcb73acff2fe 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5201,60 +5201,208 @@ def abs(self): """ return np.abs(self) - _shared_docs['describe'] = """ - Generate various summary statistics, excluding NaN values. + def describe(self, percentiles=None, include=None, exclude=None): + """ + Generates descriptive statistics that summarize the central tendency, + dispersion and shape of a dataset's distribution, excluding ``NaN`` values. - Parameters - ---------- - percentiles : array-like, optional - The percentiles to include in the output. Should all - be in the interval [0, 1]. By default `percentiles` is - [.25, .5, .75], returning the 25th, 50th, and 75th percentiles. - include, exclude : list-like, 'all', or None (default) - Specify the form of the returned result. Either: - - - None to both (default). The result will include only - numeric-typed columns or, if none are, only categorical columns. - - A list of dtypes or strings to be included/excluded. - To select all numeric types use numpy numpy.number. To select - categorical objects use type object. See also the select_dtypes - documentation. eg. df.describe(include=['O']) - - If include is the string 'all', the output column-set will - match the input one. + Analyzes both ``numeric`` and ``object`` series, as well + as `DataFrame` column sets of mixed data types. - Returns - ------- - summary: %(klass)s of summary statistics + For ``numeric`` data, the result's index will include ``count``, + ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and + upper percentiles. By default the lower percentile is `25` and the + upper percentile is ``75``. The `50` percentile is typically the + same as the median. - Notes - ----- - The output DataFrame index depends on the requested dtypes: + For ``object`` data (e.g. strings or timestamps), the result's index + will include ``count``, ``unique``, ``top``, and ``freq``. The ``top`` + is the most common value. The ``freq`` is the most common value's + frequency. Timestamps also include the ``first`` and ``last`` items. - For numeric dtypes, it will include: count, mean, std, min, - max, and lower, 50, and upper percentiles. + If multiple ``object`` values have the highest count, then the + ``count`` and ``top`` results will be arbitrarily chosen from + among those with the highest count. - For object dtypes (e.g. timestamps or strings), the index - will include the count, unique, most common, and frequency of the - most common. Timestamps also include the first and last items. + For mixed data types provided via a `DataFrame`, the result will + include a union of attributes of each type. - For mixed dtypes, the index will be the union of the corresponding - output types. Non-applicable entries will be filled with NaN. - Note that mixed-dtype outputs can only be returned from mixed-dtype - inputs and appropriate use of the include/exclude arguments. + The `include` and `exclude` parameters can be used to limit + which columns in a `DataFrame` are analyzed for the output. + The parameters are ignored when analyzing a `Series`. - If multiple values have the highest count, then the - `count` and `most common` pair will be arbitrarily chosen from - among those with the highest count. + Parameters + ---------- + percentiles : list of numbers, optional + The percentiles to include in the output. All should + fall between 0 and 1. The default is + ``[.25, .5, .75]``, which returns the 25th, 50th, and + 75th percentiles. + include : None (default), 'all', or list of dtypes or strings, optional + A white list of data types to include in the result. Ignored + for `Series`. Here are the options: + + - None (default). The result will include all ``numeric`` columns. + - 'all'. All columns on the input will be included in the output. + - A list of dtypes or strings. Limits the results to the + provided data types. + To limit the result to numeric types submit + ``numpy.number``. To limit it instead to categorical + objects submit the data type ``object``. Strings + can also be used in the style of + `select_dtypes` (e.g. df.describe(include=['O'])) + exclude : None (default) or a list of dtypes or strings, optional, + A black list of data types to omit from the result. Ignored + for `Series`. Here are the options: + + - None (default). The result will exclude nothing. + - A list of dtypes or strings. Excludes the provided data types + from the result. To select numeric types submit + ``numpy.number``. To select categorical objects submut the data + type ``object``. Strings can also be used in the style of + `select_dtypes` (e.g. df.describe(include=['O'])) + + Returns + ------- + summary: NDFrame of summary statistics + + Examples + -------- + Describing a numeric `Series`. - The include, exclude arguments are ignored for Series. + >>> import pandas as pd + >>> s = pd.Series([1, 2, 3]) + >>> s.describe() + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + + Describing a categorical `Series`. + + >>> s = pd.Series(['a', 'a', 'b', 'c']) + >>> s.describe() + count 4 + unique 3 + top a + freq 2 + dtype: object + + Describing a timestamp `Series`. + + >>> import numpy as np + >>> s = pd.Series([ + .. np.datetime64("2000-01-01"), + .. np.datetime64("2010-01-01"), + .. np.datetime64("2010-01-01") + .. ]) + >>> s.describe() + count 3 + unique 2 + top 2010-01-01 00:00:00 + freq 2 + first 2000-01-01 00:00:00 + last 2010-01-01 00:00:00 + dtype: object + + Describing a `DataFrame`. By default only numeric fields are returned. + + >>> df = pd.DataFrame( + .. [[1, 'a'], [2, 'b'], [3, 'c']], + .. columns=['numeric', 'object'] + .. ) + >>> df.describe() + numeric + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + + Describing all columns of a `DataFrame` regardless of data type. + + >>> df.describe(include='all') + numeric object + count 3.0 3 + unique NaN 3 + top NaN b + freq NaN 1 + mean 2.0 NaN + std 1.0 NaN + min 1.0 NaN + 25% 1.5 NaN + 50% 2.0 NaN + 75% 2.5 NaN + max 3.0 NaN + + Describing a column from a `DataFrame` by accessing it as an attribute. + + >>> df.numeric.describe() + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + Name: numeric, dtype: float64 + + Including only ``numeric`` columns in a `DataFrame` description. + + >>> df.describe(include=[np.number]) + numeric + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + + Including only ``string`` columns in a `DataFrame` description. + + >>> df.describe(include=[np.object]) + object + count 3 + unique 3 + top b + freq 1 + + Excluding ``numeric`` columns from a `DataFrame` description. + + >>> df.describe(exclude=[np.number]) + object + count 3 + unique 3 + top b + freq 1 + + Excluding ``object`` columns from a `DataFrame` description. + + >>> df.describe(exclude=[np.object]) + numeric + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 See Also -------- DataFrame.select_dtypes """ - - @Appender(_shared_docs['describe'] % _shared_doc_kwargs) - def describe(self, percentiles=None, include=None, exclude=None): if self.ndim >= 3: msg = "describe is not implemented on Panel or PanelND objects." raise NotImplementedError(msg) From a445d2af38b7babbc21294a1218dba7fbf31a211 Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 26 Dec 2016 20:45:29 -0800 Subject: [PATCH 2/9] PEP8 fix --- pandas/core/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index adcb73acff2fe..84d8f4acf53a7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5204,7 +5204,8 @@ def abs(self): def describe(self, percentiles=None, include=None, exclude=None): """ Generates descriptive statistics that summarize the central tendency, - dispersion and shape of a dataset's distribution, excluding ``NaN`` values. + dispersion and shape of a dataset's distribution, excluding + ``NaN`` values. Analyzes both ``numeric`` and ``object`` series, as well as `DataFrame` column sets of mixed data types. From 55cf4ec31a19b0a5a6cde94fef8a2d6762e929fb Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 26 Dec 2016 21:09:16 -0800 Subject: [PATCH 3/9] Slight change for consistency --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 84d8f4acf53a7..6000b17591b59 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5212,8 +5212,8 @@ def describe(self, percentiles=None, include=None, exclude=None): For ``numeric`` data, the result's index will include ``count``, ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and - upper percentiles. By default the lower percentile is `25` and the - upper percentile is ``75``. The `50` percentile is typically the + upper percentiles. By default the lower percentile is ``25`` and the + upper percentile is ``75``. The ``50`` percentile is typically the same as the median. For ``object`` data (e.g. strings or timestamps), the result's index From 0161a57c7f00c7b408c4ff12345b82fc5513b26d Mon Sep 17 00:00:00 2001 From: palewire Date: Tue, 27 Dec 2016 10:47:04 -0800 Subject: [PATCH 4/9] Removed a bunch of tick marks and moved the extended description down to the Notes section --- pandas/core/generic.py | 88 +++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 40 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6000b17591b59..ad2de0ef48ea8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5207,31 +5207,6 @@ def describe(self, percentiles=None, include=None, exclude=None): dispersion and shape of a dataset's distribution, excluding ``NaN`` values. - Analyzes both ``numeric`` and ``object`` series, as well - as `DataFrame` column sets of mixed data types. - - For ``numeric`` data, the result's index will include ``count``, - ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and - upper percentiles. By default the lower percentile is ``25`` and the - upper percentile is ``75``. The ``50`` percentile is typically the - same as the median. - - For ``object`` data (e.g. strings or timestamps), the result's index - will include ``count``, ``unique``, ``top``, and ``freq``. The ``top`` - is the most common value. The ``freq`` is the most common value's - frequency. Timestamps also include the ``first`` and ``last`` items. - - If multiple ``object`` values have the highest count, then the - ``count`` and ``top`` results will be arbitrarily chosen from - among those with the highest count. - - For mixed data types provided via a `DataFrame`, the result will - include a union of attributes of each type. - - The `include` and `exclude` parameters can be used to limit - which columns in a `DataFrame` are analyzed for the output. - The parameters are ignored when analyzing a `Series`. - Parameters ---------- percentiles : list of numbers, optional @@ -5243,33 +5218,61 @@ def describe(self, percentiles=None, include=None, exclude=None): A white list of data types to include in the result. Ignored for `Series`. Here are the options: - - None (default). The result will include all ``numeric`` columns. + - None (default). The result will include all numeric columns. - 'all'. All columns on the input will be included in the output. - A list of dtypes or strings. Limits the results to the provided data types. To limit the result to numeric types submit ``numpy.number``. To limit it instead to categorical - objects submit the data type ``object``. Strings + objects submit the object data type. Strings can also be used in the style of - `select_dtypes` (e.g. df.describe(include=['O'])) + ``select_dtypes`` (e.g. ``df.describe(include=['O'])``) exclude : None (default) or a list of dtypes or strings, optional, A black list of data types to omit from the result. Ignored - for `Series`. Here are the options: + for Series. Here are the options: - None (default). The result will exclude nothing. - A list of dtypes or strings. Excludes the provided data types from the result. To select numeric types submit ``numpy.number``. To select categorical objects submut the data type ``object``. Strings can also be used in the style of - `select_dtypes` (e.g. df.describe(include=['O'])) + ``select_dtypes`` (e.g. ``df.describe(include=['O'])``) Returns ------- summary: NDFrame of summary statistics + Notes + ----- + + Analyzes both numeric and object series, as well + as DataFrame column sets of mixed data types. + + For numeric data, the result's index will include ``count``, + ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and + upper percentiles. By default the lower percentile is ``25`` and the + upper percentile is ``75``. The ``50`` percentile is typically the + same as the median. + + For object data (e.g. strings or timestamps), the result's index + will include ``count``, ``unique``, ``top``, and ``freq``. The ``top`` + is the most common value. The ``freq`` is the most common value's + frequency. Timestamps also include the ``first`` and ``last`` items. + + If multiple object values have the highest count, then the + ``count`` and ``top`` results will be arbitrarily chosen from + among those with the highest count. + + For mixed data types provided via a DataFrame, the result will + include a union of attributes of each type. + + The `include` and `exclude` parameters can be used to limit + which columns in a DataFrame are analyzed for the output. + The parameters are ignored when analyzing a Series. + Examples -------- - Describing a numeric `Series`. + Describing a numeric Series. >>> import pandas as pd >>> s = pd.Series([1, 2, 3]) @@ -5283,7 +5286,7 @@ def describe(self, percentiles=None, include=None, exclude=None): 75% 2.5 max 3.0 - Describing a categorical `Series`. + Describing a categorical Series. >>> s = pd.Series(['a', 'a', 'b', 'c']) >>> s.describe() @@ -5293,7 +5296,7 @@ def describe(self, percentiles=None, include=None, exclude=None): freq 2 dtype: object - Describing a timestamp `Series`. + Describing a timestamp Series. >>> import numpy as np >>> s = pd.Series([ @@ -5310,7 +5313,7 @@ def describe(self, percentiles=None, include=None, exclude=None): last 2010-01-01 00:00:00 dtype: object - Describing a `DataFrame`. By default only numeric fields are returned. + Describing a DataFrame. By default only numeric fields are returned. >>> df = pd.DataFrame( .. [[1, 'a'], [2, 'b'], [3, 'c']], @@ -5327,7 +5330,7 @@ def describe(self, percentiles=None, include=None, exclude=None): 75% 2.5 max 3.0 - Describing all columns of a `DataFrame` regardless of data type. + Describing all columns of a DataFrame regardless of data type. >>> df.describe(include='all') numeric object @@ -5343,7 +5346,7 @@ def describe(self, percentiles=None, include=None, exclude=None): 75% 2.5 NaN max 3.0 NaN - Describing a column from a `DataFrame` by accessing it as an attribute. + Describing a column from a DataFrame by accessing it as an attribute. >>> df.numeric.describe() count 3.0 @@ -5356,7 +5359,7 @@ def describe(self, percentiles=None, include=None, exclude=None): max 3.0 Name: numeric, dtype: float64 - Including only ``numeric`` columns in a `DataFrame` description. + Including only numeric columns in a DataFrame description. >>> df.describe(include=[np.number]) numeric @@ -5369,7 +5372,7 @@ def describe(self, percentiles=None, include=None, exclude=None): 75% 2.5 max 3.0 - Including only ``string`` columns in a `DataFrame` description. + Including only string columns in a DataFrame description. >>> df.describe(include=[np.object]) object @@ -5378,7 +5381,7 @@ def describe(self, percentiles=None, include=None, exclude=None): top b freq 1 - Excluding ``numeric`` columns from a `DataFrame` description. + Excluding numeric columns from a DataFrame description. >>> df.describe(exclude=[np.number]) object @@ -5387,7 +5390,7 @@ def describe(self, percentiles=None, include=None, exclude=None): top b freq 1 - Excluding ``object`` columns from a `DataFrame` description. + Excluding object columns from a DataFrame description. >>> df.describe(exclude=[np.object]) numeric @@ -5402,6 +5405,11 @@ def describe(self, percentiles=None, include=None, exclude=None): See Also -------- + DataFrame.count + DataFrame.max + DataFrame.min + DataFrame.mean + DataFrame.std DataFrame.select_dtypes """ if self.ndim >= 3: From 38015cee6d681f0391926c3ede830f23bdfe1f55 Mon Sep 17 00:00:00 2001 From: palewire Date: Tue, 27 Dec 2016 10:49:32 -0800 Subject: [PATCH 5/9] Typo fix --- pandas/core/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ad2de0ef48ea8..71a54422c3fb5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5224,7 +5224,7 @@ def describe(self, percentiles=None, include=None, exclude=None): provided data types. To limit the result to numeric types submit ``numpy.number``. To limit it instead to categorical - objects submit the object data type. Strings + objects submit the ``numpy.object`` data type. Strings can also be used in the style of ``select_dtypes`` (e.g. ``df.describe(include=['O'])``) exclude : None (default) or a list of dtypes or strings, optional, @@ -5234,8 +5234,8 @@ def describe(self, percentiles=None, include=None, exclude=None): - None (default). The result will exclude nothing. - A list of dtypes or strings. Excludes the provided data types from the result. To select numeric types submit - ``numpy.number``. To select categorical objects submut the data - type ``object``. Strings can also be used in the style of + ``numpy.number``. To select categorical objects submit the data + type ``numpy.object``. Strings can also be used in the style of ``select_dtypes`` (e.g. ``df.describe(include=['O'])``) Returns From 86dd44a7afd2c5a389502eb77be31bdf7ce4ceb8 Mon Sep 17 00:00:00 2001 From: palewire Date: Wed, 28 Dec 2016 08:30:56 -0800 Subject: [PATCH 6/9] Further improvements to describe documentation --- pandas/core/generic.py | 62 ++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 71a54422c3fb5..ddf3490c5a295 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5207,51 +5207,53 @@ def describe(self, percentiles=None, include=None, exclude=None): dispersion and shape of a dataset's distribution, excluding ``NaN`` values. + Analyzes both numeric and object series, as well + as ``DataFrame`` column sets of mixed data types. The output + will vary depending on what is provided. Refer to the notes + below for more detail. + Parameters ---------- - percentiles : list of numbers, optional + percentiles : list-like of numbers, optional The percentiles to include in the output. All should fall between 0 and 1. The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and 75th percentiles. - include : None (default), 'all', or list of dtypes or strings, optional + include : 'all' , list-like of dtypes or strings, or None (default), optional A white list of data types to include in the result. Ignored - for `Series`. Here are the options: + for ``Series``. Here are the options: - - None (default). The result will include all numeric columns. - - 'all'. All columns on the input will be included in the output. - - A list of dtypes or strings. Limits the results to the + - 'all' : All columns of the input will be included in the output. + - A list-like of dtypes or strings : Limits the results to the provided data types. To limit the result to numeric types submit ``numpy.number``. To limit it instead to categorical objects submit the ``numpy.object`` data type. Strings can also be used in the style of ``select_dtypes`` (e.g. ``df.describe(include=['O'])``) - exclude : None (default) or a list of dtypes or strings, optional, + - None (default) : The result will include all numeric columns. + exclude : list-like of dtypes or strings, or None (default), optional, A black list of data types to omit from the result. Ignored - for Series. Here are the options: + for ``Series``. Here are the options: - - None (default). The result will exclude nothing. - - A list of dtypes or strings. Excludes the provided data types + - A list-like of dtypes or strings : Excludes the provided data types from the result. To select numeric types submit ``numpy.number``. To select categorical objects submit the data type ``numpy.object``. Strings can also be used in the style of ``select_dtypes`` (e.g. ``df.describe(include=['O'])``) + - None (default) : The result will exclude nothing. Returns ------- - summary: NDFrame of summary statistics + summary: Series/DataFrame of summary statistics Notes ----- - Analyzes both numeric and object series, as well - as DataFrame column sets of mixed data types. - For numeric data, the result's index will include ``count``, ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and upper percentiles. By default the lower percentile is ``25`` and the - upper percentile is ``75``. The ``50`` percentile is typically the + upper percentile is ``75``. The ``50`` percentile is the same as the median. For object data (e.g. strings or timestamps), the result's index @@ -5263,16 +5265,18 @@ def describe(self, percentiles=None, include=None, exclude=None): ``count`` and ``top`` results will be arbitrarily chosen from among those with the highest count. - For mixed data types provided via a DataFrame, the result will - include a union of attributes of each type. + For mixed data types provided via a ``DataFrame``, the default is to + return only an analysis of numeric columns. If ``include='all'`` + is provided as an option, the result will include a union of + attributes of each type. The `include` and `exclude` parameters can be used to limit - which columns in a DataFrame are analyzed for the output. - The parameters are ignored when analyzing a Series. + which columns in a ``DataFrame`` are analyzed for the output. + The parameters are ignored when analyzing a ``Series``. Examples -------- - Describing a numeric Series. + Describing a numeric ``Series``. >>> import pandas as pd >>> s = pd.Series([1, 2, 3]) @@ -5286,7 +5290,7 @@ def describe(self, percentiles=None, include=None, exclude=None): 75% 2.5 max 3.0 - Describing a categorical Series. + Describing a categorical ``Series``. >>> s = pd.Series(['a', 'a', 'b', 'c']) >>> s.describe() @@ -5296,7 +5300,7 @@ def describe(self, percentiles=None, include=None, exclude=None): freq 2 dtype: object - Describing a timestamp Series. + Describing a timestamp ``Series``. >>> import numpy as np >>> s = pd.Series([ @@ -5313,7 +5317,7 @@ def describe(self, percentiles=None, include=None, exclude=None): last 2010-01-01 00:00:00 dtype: object - Describing a DataFrame. By default only numeric fields are returned. + Describing a ``DataFrame``. By default only numeric fields are returned. >>> df = pd.DataFrame( .. [[1, 'a'], [2, 'b'], [3, 'c']], @@ -5330,7 +5334,7 @@ def describe(self, percentiles=None, include=None, exclude=None): 75% 2.5 max 3.0 - Describing all columns of a DataFrame regardless of data type. + Describing all columns of a ``DataFrame`` regardless of data type. >>> df.describe(include='all') numeric object @@ -5346,7 +5350,7 @@ def describe(self, percentiles=None, include=None, exclude=None): 75% 2.5 NaN max 3.0 NaN - Describing a column from a DataFrame by accessing it as an attribute. + Describing a column from a ``DataFrame`` by accessing it as an attribute. >>> df.numeric.describe() count 3.0 @@ -5359,7 +5363,7 @@ def describe(self, percentiles=None, include=None, exclude=None): max 3.0 Name: numeric, dtype: float64 - Including only numeric columns in a DataFrame description. + Including only numeric columns in a ``DataFrame`` description. >>> df.describe(include=[np.number]) numeric @@ -5372,7 +5376,7 @@ def describe(self, percentiles=None, include=None, exclude=None): 75% 2.5 max 3.0 - Including only string columns in a DataFrame description. + Including only string columns in a ``DataFrame`` description. >>> df.describe(include=[np.object]) object @@ -5381,7 +5385,7 @@ def describe(self, percentiles=None, include=None, exclude=None): top b freq 1 - Excluding numeric columns from a DataFrame description. + Excluding numeric columns from a ``DataFrame`` description. >>> df.describe(exclude=[np.number]) object @@ -5390,7 +5394,7 @@ def describe(self, percentiles=None, include=None, exclude=None): top b freq 1 - Excluding object columns from a DataFrame description. + Excluding object columns from a ``DataFrame`` description. >>> df.describe(exclude=[np.object]) numeric From 8880a89889e16ced7c0b6201f40981b7e8b51b68 Mon Sep 17 00:00:00 2001 From: palewire Date: Sat, 31 Dec 2016 10:58:02 -0800 Subject: [PATCH 7/9] PEP8 fixes --- pandas/core/generic.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ddf3490c5a295..935b24c11ae87 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5219,12 +5219,12 @@ def describe(self, percentiles=None, include=None, exclude=None): fall between 0 and 1. The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and 75th percentiles. - include : 'all' , list-like of dtypes or strings, or None (default), optional + include : 'all', list-like of dtypes or None (default), optional A white list of data types to include in the result. Ignored for ``Series``. Here are the options: - 'all' : All columns of the input will be included in the output. - - A list-like of dtypes or strings : Limits the results to the + - A list-like of dtypes : Limits the results to the provided data types. To limit the result to numeric types submit ``numpy.number``. To limit it instead to categorical @@ -5232,11 +5232,11 @@ def describe(self, percentiles=None, include=None, exclude=None): can also be used in the style of ``select_dtypes`` (e.g. ``df.describe(include=['O'])``) - None (default) : The result will include all numeric columns. - exclude : list-like of dtypes or strings, or None (default), optional, + exclude : list-like of dtypes or None (default), optional, A black list of data types to omit from the result. Ignored for ``Series``. Here are the options: - - A list-like of dtypes or strings : Excludes the provided data types + - A list-like of dtypes : Excludes the provided data types from the result. To select numeric types submit ``numpy.number``. To select categorical objects submit the data type ``numpy.object``. Strings can also be used in the style of @@ -5317,7 +5317,8 @@ def describe(self, percentiles=None, include=None, exclude=None): last 2010-01-01 00:00:00 dtype: object - Describing a ``DataFrame``. By default only numeric fields are returned. + Describing a ``DataFrame``. By default only numeric fields + are returned. >>> df = pd.DataFrame( .. [[1, 'a'], [2, 'b'], [3, 'c']], @@ -5350,7 +5351,8 @@ def describe(self, percentiles=None, include=None, exclude=None): 75% 2.5 NaN max 3.0 NaN - Describing a column from a ``DataFrame`` by accessing it as an attribute. + Describing a column from a ``DataFrame`` by accessing it as + an attribute. >>> df.numeric.describe() count 3.0 From dff88bb123232bb3e4d25ae3ad16e0dc2cbd2be5 Mon Sep 17 00:00:00 2001 From: Ben Welsh Date: Sat, 31 Dec 2016 17:55:44 -0800 Subject: [PATCH 8/9] Fixed whitespace --- pandas/core/generic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 935b24c11ae87..c313fc9069072 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5249,7 +5249,6 @@ def describe(self, percentiles=None, include=None, exclude=None): Notes ----- - For numeric data, the result's index will include ``count``, ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and upper percentiles. By default the lower percentile is ``25`` and the From d97df49f4bce2cce600b8b73207543cbff540f93 Mon Sep 17 00:00:00 2001 From: palewire Date: Sat, 31 Dec 2016 18:22:05 -0800 Subject: [PATCH 9/9] A third dot --- pandas/core/generic.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 935b24c11ae87..674210ea1553d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5304,10 +5304,10 @@ def describe(self, percentiles=None, include=None, exclude=None): >>> import numpy as np >>> s = pd.Series([ - .. np.datetime64("2000-01-01"), - .. np.datetime64("2010-01-01"), - .. np.datetime64("2010-01-01") - .. ]) + ... np.datetime64("2000-01-01"), + ... np.datetime64("2010-01-01"), + ... np.datetime64("2010-01-01") + ... ]) >>> s.describe() count 3 unique 2 @@ -5321,9 +5321,9 @@ def describe(self, percentiles=None, include=None, exclude=None): are returned. >>> df = pd.DataFrame( - .. [[1, 'a'], [2, 'b'], [3, 'c']], - .. columns=['numeric', 'object'] - .. ) + ... [[1, 'a'], [2, 'b'], [3, 'c']], + ... columns=['numeric', 'object'] + ... ) >>> df.describe() numeric count 3.0