diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a66d00fff9714..7092887975727 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2432,8 +2432,8 @@ def eval(self, expr, inplace=False, **kwargs): return _eval(expr, inplace=inplace, **kwargs) def select_dtypes(self, include=None, exclude=None): - """Return a subset of a DataFrame including/excluding columns based on - their ``dtype``. + """ + Return a subset of the DataFrame's columns based on the column dtypes. Parameters ---------- @@ -2471,25 +2471,27 @@ def select_dtypes(self, include=None, exclude=None): Examples -------- - >>> df = pd.DataFrame({'a': np.random.randn(6).astype('f4'), + >>> df = pd.DataFrame({'a': [1, 2] * 3, ... 'b': [True, False] * 3, ... 'c': [1.0, 2.0] * 3}) >>> df a b c - 0 0.3962 True 1.0 - 1 0.1459 False 2.0 - 2 0.2623 True 1.0 - 3 0.0764 False 2.0 - 4 -0.9703 True 1.0 - 5 -1.2094 False 2.0 + 0 1 True 1.0 + 1 2 False 2.0 + 2 1 True 1.0 + 3 2 False 2.0 + 4 1 True 1.0 + 5 2 False 2.0 + >>> df.select_dtypes(include='bool') - c + b 0 True 1 False 2 True 3 False 4 True 5 False + >>> df.select_dtypes(include=['float64']) c 0 1.0 @@ -2498,14 +2500,15 @@ def select_dtypes(self, include=None, exclude=None): 3 2.0 4 1.0 5 2.0 - >>> df.select_dtypes(exclude=['floating']) - b - 0 True - 1 False - 2 True - 3 False - 4 True - 5 False + + >>> df.select_dtypes(exclude=['int']) + b c + 0 True 1.0 + 1 False 2.0 + 2 True 1.0 + 3 False 2.0 + 4 True 1.0 + 5 False 2.0 """ if not is_list_like(include): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a893b2ba1a189..0074665505fee 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4260,16 +4260,116 @@ def _get_values(self): return self.values def get_values(self): - """same as values (but handles sparseness conversions)""" + """ + Return an ndarray after converting sparse values to dense. + + This is the same as ``.values`` for non-sparse data. For sparse + data contained in a `pandas.SparseArray`, the data are first + converted to a dense representation. + + Returns + ------- + numpy.ndarray + Numpy representation of DataFrame + + See Also + -------- + values : Numpy representation of DataFrame. + pandas.SparseArray : Container for sparse data. + + Examples + -------- + >>> df = pd.DataFrame({'a': [1, 2], 'b': [True, False], + ... 'c': [1.0, 2.0]}) + >>> df + a b c + 0 1 True 1.0 + 1 2 False 2.0 + + >>> df.get_values() + array([[1, True, 1.0], [2, False, 2.0]], dtype=object) + + >>> df = pd.DataFrame({"a": pd.SparseArray([1, None, None]), + ... "c": [1.0, 2.0, 3.0]}) + >>> df + a c + 0 1.0 1.0 + 1 NaN 2.0 + 2 NaN 3.0 + + >>> df.get_values() + array([[ 1., 1.], + [nan, 2.], + [nan, 3.]]) + """ return self.values def get_dtype_counts(self): - """Return the counts of dtypes in this object.""" + """ + Return counts of unique dtypes in this object. + + Returns + ------- + dtype : Series + Series with the count of columns with each dtype. + + See Also + -------- + dtypes : Return the dtypes in this object. + + Examples + -------- + >>> a = [['a', 1, 1.0], ['b', 2, 2.0], ['c', 3, 3.0]] + >>> df = pd.DataFrame(a, columns=['str', 'int', 'float']) + >>> df + str int float + 0 a 1 1.0 + 1 b 2 2.0 + 2 c 3 3.0 + + >>> df.get_dtype_counts() + float64 1 + int64 1 + object 1 + dtype: int64 + """ from pandas import Series return Series(self._data.get_dtype_counts()) def get_ftype_counts(self): - """Return the counts of ftypes in this object.""" + """ + Return counts of unique ftypes in this object. + + This is useful for SparseDataFrame or for DataFrames containing + sparse arrays. + + Returns + ------- + dtype : Series + Series with the count of columns with each type and + sparsity (dense/sparse) + + See Also + -------- + ftypes : Return ftypes (indication of sparse/dense and dtype) in + this object. + + Examples + -------- + >>> a = [['a', 1, 1.0], ['b', 2, 2.0], ['c', 3, 3.0]] + >>> df = pd.DataFrame(a, columns=['str', 'int', 'float']) + >>> df + str int float + 0 a 1 1.0 + 1 b 2 2.0 + 2 c 3 3.0 + + >>> df.get_ftype_counts() + float64:dense 1 + int64:dense 1 + object:dense 1 + dtype: int64 + """ from pandas import Series return Series(self._data.get_ftype_counts())