@@ -6362,20 +6362,22 @@ def describe(self, percentiles=None, include=None, exclude=None):
6362
6362
- A list-like of dtypes : Limits the results to the
6363
6363
provided data types.
6364
6364
To limit the result to numeric types submit
6365
- ``numpy.number``. To limit it instead to categorical
6366
- objects submit the ``numpy.object`` data type. Strings
6365
+ ``numpy.number``. To limit it instead to object columns submit
6366
+ the ``numpy.object`` data type. Strings
6367
6367
can also be used in the style of
6368
- ``select_dtypes`` (e.g. ``df.describe(include=['O'])``)
6368
+ ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To
6369
+ select pandas categorical columns, use ``'category'``
6369
6370
- None (default) : The result will include all numeric columns.
6370
6371
exclude : list-like of dtypes or None (default), optional,
6371
6372
A black list of data types to omit from the result. Ignored
6372
6373
for ``Series``. Here are the options:
6373
6374
6374
6375
- A list-like of dtypes : Excludes the provided data types
6375
- from the result. To select numeric types submit
6376
- ``numpy.number``. To select categorical objects submit the data
6376
+ from the result. To exclude numeric types submit
6377
+ ``numpy.number``. To exclude object columns submit the data
6377
6378
type ``numpy.object``. Strings can also be used in the style of
6378
- ``select_dtypes`` (e.g. ``df.describe(include=['O'])``)
6379
+ ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To
6380
+ exclude pandas categorical columns, use ``'category'``
6379
6381
- None (default) : The result will exclude nothing.
6380
6382
6381
6383
Returns
@@ -6400,9 +6402,11 @@ def describe(self, percentiles=None, include=None, exclude=None):
6400
6402
among those with the highest count.
6401
6403
6402
6404
For mixed data types provided via a ``DataFrame``, the default is to
6403
- return only an analysis of numeric columns. If ``include='all'``
6404
- is provided as an option, the result will include a union of
6405
- attributes of each type.
6405
+ return only an analysis of numeric columns. If the dataframe consists
6406
+ only of object and categorical data without any numeric columns, the
6407
+ default is to return an analysis of both the object and categorical
6408
+ columns. If ``include='all'`` is provided as an option, the result
6409
+ will include a union of attributes of each type.
6406
6410
6407
6411
The `include` and `exclude` parameters can be used to limit
6408
6412
which columns in a ``DataFrame`` are analyzed for the output.
@@ -6452,8 +6456,10 @@ def describe(self, percentiles=None, include=None, exclude=None):
6452
6456
Describing a ``DataFrame``. By default only numeric fields
6453
6457
are returned.
6454
6458
6455
- >>> df = pd.DataFrame([[1, 'a'], [2, 'b'], [3, 'c']],
6456
- ... columns=['numeric', 'object'])
6459
+ >>> df = pd.DataFrame({ 'object': ['a', 'b', 'c'],
6460
+ ... 'numeric': [1, 2, 3],
6461
+ ... 'categorical': pd.Categorical(['d','e','f'])
6462
+ ... })
6457
6463
>>> df.describe()
6458
6464
numeric
6459
6465
count 3.0
@@ -6468,18 +6474,18 @@ def describe(self, percentiles=None, include=None, exclude=None):
6468
6474
Describing all columns of a ``DataFrame`` regardless of data type.
6469
6475
6470
6476
>>> df.describe(include='all')
6471
- numeric object
6472
- count 3.0 3
6473
- unique NaN 3
6474
- top NaN b
6475
- freq NaN 1
6476
- mean 2.0 NaN
6477
- std 1.0 NaN
6478
- min 1.0 NaN
6479
- 25% 1.5 NaN
6480
- 50% 2.0 NaN
6481
- 75% 2.5 NaN
6482
- max 3.0 NaN
6477
+ categorical numeric object
6478
+ count 3 3.0 3
6479
+ unique 3 NaN 3
6480
+ top f NaN c
6481
+ freq 1 NaN 1
6482
+ mean NaN 2.0 NaN
6483
+ std NaN 1.0 NaN
6484
+ min NaN 1.0 NaN
6485
+ 25% NaN 1.5 NaN
6486
+ 50% NaN 2.0 NaN
6487
+ 75% NaN 2.5 NaN
6488
+ max NaN 3.0 NaN
6483
6489
6484
6490
Describing a column from a ``DataFrame`` by accessing it as
6485
6491
an attribute.
@@ -6514,30 +6520,42 @@ def describe(self, percentiles=None, include=None, exclude=None):
6514
6520
object
6515
6521
count 3
6516
6522
unique 3
6517
- top b
6523
+ top c
6518
6524
freq 1
6519
6525
6526
+ Including only categorical columns from a ``DataFrame`` description.
6527
+
6528
+ >>> df.describe(include=['category'])
6529
+ categorical
6530
+ count 3
6531
+ unique 3
6532
+ top f
6533
+ freq 1
6534
+
6520
6535
Excluding numeric columns from a ``DataFrame`` description.
6521
6536
6522
6537
>>> df.describe(exclude=[np.number])
6523
- object
6524
- count 3
6525
- unique 3
6526
- top b
6527
- freq 1
6538
+ categorical object
6539
+ count 3 3
6540
+ unique 3 3
6541
+ top f c
6542
+ freq 1 1
6528
6543
6529
6544
Excluding object columns from a ``DataFrame`` description.
6530
6545
6531
6546
>>> df.describe(exclude=[np.object])
6532
- numeric
6533
- count 3.0
6534
- mean 2.0
6535
- std 1.0
6536
- min 1.0
6537
- 25% 1.5
6538
- 50% 2.0
6539
- 75% 2.5
6540
- max 3.0
6547
+ categorical numeric
6548
+ count 3 3.0
6549
+ unique 3 NaN
6550
+ top f NaN
6551
+ freq 1 NaN
6552
+ mean NaN 2.0
6553
+ std NaN 1.0
6554
+ min NaN 1.0
6555
+ 25% NaN 1.5
6556
+ 50% NaN 2.0
6557
+ 75% NaN 2.5
6558
+ max NaN 3.0
6541
6559
6542
6560
See Also
6543
6561
--------
0 commit comments