@@ -6352,20 +6352,22 @@ def describe(self, percentiles=None, include=None, exclude=None):
6352
6352
- A list-like of dtypes : Limits the results to the
6353
6353
provided data types.
6354
6354
To limit the result to numeric types submit
6355
- ``numpy.number``. To limit it instead to categorical
6356
- objects submit the ``numpy.object`` data type. Strings
6355
+ ``numpy.number``. To limit it instead to object columns submit
6356
+ the ``numpy.object`` data type. Strings
6357
6357
can also be used in the style of
6358
- ``select_dtypes`` (e.g. ``df.describe(include=['O'])``)
6358
+ ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To
6359
+ select pandas categorical columns, use ``'category'``
6359
6360
- None (default) : The result will include all numeric columns.
6360
6361
exclude : list-like of dtypes or None (default), optional,
6361
6362
A black list of data types to omit from the result. Ignored
6362
6363
for ``Series``. Here are the options:
6363
6364
6364
6365
- A list-like of dtypes : Excludes the provided data types
6365
- from the result. To select numeric types submit
6366
- ``numpy.number``. To select categorical objects submit the data
6366
+ from the result. To exclude numeric types submit
6367
+ ``numpy.number``. To exclude object columns submit the data
6367
6368
type ``numpy.object``. Strings can also be used in the style of
6368
- ``select_dtypes`` (e.g. ``df.describe(include=['O'])``)
6369
+ ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To
6370
+ exclude pandas categorical columns, use ``'category'``
6369
6371
- None (default) : The result will exclude nothing.
6370
6372
6371
6373
Returns
@@ -6390,9 +6392,11 @@ def describe(self, percentiles=None, include=None, exclude=None):
6390
6392
among those with the highest count.
6391
6393
6392
6394
For mixed data types provided via a ``DataFrame``, the default is to
6393
- return only an analysis of numeric columns. If ``include='all'``
6394
- is provided as an option, the result will include a union of
6395
- attributes of each type.
6395
+ return only an analysis of numeric columns. If the dataframe consists
6396
+ only of object and categorical data without any numeric columns, the
6397
+ default is to return an analysis of both the object and categorical
6398
+ columns. If ``include='all'`` is provided as an option, the result
6399
+ will include a union of attributes of each type.
6396
6400
6397
6401
The `include` and `exclude` parameters can be used to limit
6398
6402
which columns in a ``DataFrame`` are analyzed for the output.
@@ -6442,8 +6446,10 @@ def describe(self, percentiles=None, include=None, exclude=None):
6442
6446
Describing a ``DataFrame``. By default only numeric fields
6443
6447
are returned.
6444
6448
6445
- >>> df = pd.DataFrame([[1, 'a'], [2, 'b'], [3, 'c']],
6446
- ... columns=['numeric', 'object'])
6449
+ >>> df = pd.DataFrame({ 'object': ['a', 'b', 'c'],
6450
+ ... 'numeric': [1, 2, 3],
6451
+ ... 'categorical': pd.Categorical(['d','e','f'])
6452
+ ... })
6447
6453
>>> df.describe()
6448
6454
numeric
6449
6455
count 3.0
@@ -6457,19 +6463,19 @@ def describe(self, percentiles=None, include=None, exclude=None):
6457
6463
6458
6464
Describing all columns of a ``DataFrame`` regardless of data type.
6459
6465
6460
- >>> df.describe(include='all')
6461
- numeric object
6462
- count 3.0 3
6463
- unique NaN 3
6464
- top NaN b
6465
- freq NaN 1
6466
- mean 2.0 NaN
6467
- std 1.0 NaN
6468
- min 1.0 NaN
6469
- 25% 1.5 NaN
6470
- 50% 2.0 NaN
6471
- 75% 2.5 NaN
6472
- max 3.0 NaN
6466
+ >>> df.describe(include='all')
6467
+ categorical numeric object
6468
+ count 3 3.0 3
6469
+ unique 3 NaN 3
6470
+ top f NaN c
6471
+ freq 1 NaN 1
6472
+ mean NaN 2.0 NaN
6473
+ std NaN 1.0 NaN
6474
+ min NaN 1.0 NaN
6475
+ 25% NaN 1.5 NaN
6476
+ 50% NaN 2.0 NaN
6477
+ 75% NaN 2.5 NaN
6478
+ max NaN 3.0 NaN
6473
6479
6474
6480
Describing a column from a ``DataFrame`` by accessing it as
6475
6481
an attribute.
@@ -6504,31 +6510,43 @@ def describe(self, percentiles=None, include=None, exclude=None):
6504
6510
object
6505
6511
count 3
6506
6512
unique 3
6507
- top b
6513
+ top c
6508
6514
freq 1
6509
6515
6516
+ Including only categorical columns from a ``DataFrame`` description.
6517
+
6518
+ >>> df.describe(include=['category'])
6519
+ categorical
6520
+ count 3
6521
+ unique 3
6522
+ top f
6523
+ freq 1
6524
+
6510
6525
Excluding numeric columns from a ``DataFrame`` description.
6511
6526
6512
6527
>>> df.describe(exclude=[np.number])
6513
- object
6514
- count 3
6515
- unique 3
6516
- top b
6517
- freq 1
6528
+ categorical object
6529
+ count 3 3
6530
+ unique 3 3
6531
+ top f c
6532
+ freq 1 1
6518
6533
6519
6534
Excluding object columns from a ``DataFrame`` description.
6520
6535
6521
6536
>>> df.describe(exclude=[np.object])
6522
- numeric
6523
- count 3.0
6524
- mean 2.0
6525
- std 1.0
6526
- min 1.0
6527
- 25% 1.5
6528
- 50% 2.0
6529
- 75% 2.5
6530
- max 3.0
6531
-
6537
+ categorical numeric
6538
+ count 3 3.0
6539
+ unique 3 NaN
6540
+ top f NaN
6541
+ freq 1 NaN
6542
+ mean NaN 2.0
6543
+ std NaN 1.0
6544
+ min NaN 1.0
6545
+ 25% NaN 1.5
6546
+ 50% NaN 2.0
6547
+ 75% NaN 2.5
6548
+ max NaN 3.0
6549
+
6532
6550
See Also
6533
6551
--------
6534
6552
DataFrame.count
0 commit comments