From 087f4416be069d4f7fc48247917e277744fd139b Mon Sep 17 00:00:00 2001 From: Gioia Ballin Date: Sat, 10 Mar 2018 16:03:08 +0000 Subject: [PATCH 1/2] Updates the documentation for pandas.DataFrame.to_sparse. --- pandas/core/frame.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9e57579ddfc05..60e1a21771a11 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1570,16 +1570,44 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, def to_sparse(self, fill_value=None, kind='block'): """ - Convert to SparseDataFrame + Convert to SparseDataFrame. + + Implement the sparse version of the DataFrame meaning that any data + matching a specific value it's omitted in the representation. + The sparse DataFrame allows for a more efficient storage. Parameters ---------- fill_value : float, default NaN + The specific value that should be omitted in the representation. kind : {'block', 'integer'} + The kind of the SparseIndex tracking where data is not equal to + the fill value: + + - 'block' tracks only the locations and sizes of blocks of data; + - 'integer' keeps an array with all the locations of the data. + + The kind 'block' is recommended since it's more memory efficient. Returns ------- y : SparseDataFrame + + See Also + -------- + DataFrame.to_dense : + converts the DataFrame back to the its dense form + + Examples + -------- + + Compressing on the zero value. + + >>> df = pd.DataFrame(np.random.randn(1000, 4)) + >>> df.iloc[:995] = 0. + >>> sdf = df.to_sparse(fill_value=0.) + >>> sdf.density + 0.005 """ from pandas.core.sparse.frame import SparseDataFrame return SparseDataFrame(self._series, index=self.index, From dcdf0bbcfe750d3562b7bf160674aa4bbcab74ca Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 9 Jul 2018 22:48:02 -0500 Subject: [PATCH 2/2] Minor fixes and adding more real world examples --- pandas/core/frame.py | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 761745f587a28..64edb8c3848d0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1607,36 +1607,49 @@ def to_sparse(self, fill_value=None, kind='block'): Parameters ---------- - fill_value : float, default NaN + fill_value : float, default None The specific value that should be omitted in the representation. - kind : {'block', 'integer'} + kind : {'block', 'integer'}, default 'block' The kind of the SparseIndex tracking where data is not equal to the fill value: - - 'block' tracks only the locations and sizes of blocks of data; + - 'block' tracks only the locations and sizes of blocks of data. - 'integer' keeps an array with all the locations of the data. - The kind 'block' is recommended since it's more memory efficient. + In most cases 'block' is recommended, since it's more memory + efficient. Returns ------- - y : SparseDataFrame + SparseDataFrame + The sparse representation of the DataFrame. See Also -------- DataFrame.to_dense : - converts the DataFrame back to the its dense form + Converts the DataFrame back to the its dense form. Examples -------- + >>> df = pd.DataFrame([(np.nan, np.nan), + ... (1., np.nan), + ... (np.nan, 1.)]) + >>> df + 0 1 + 0 NaN NaN + 1 1.0 NaN + 2 NaN 1.0 + >>> type(df) + - Compressing on the zero value. - - >>> df = pd.DataFrame(np.random.randn(1000, 4)) - >>> df.iloc[:995] = 0. - >>> sdf = df.to_sparse(fill_value=0.) - >>> sdf.density - 0.005 + >>> sdf = df.to_sparse() + >>> sdf + 0 1 + 0 NaN NaN + 1 1.0 NaN + 2 NaN 1.0 + >>> type(sdf) + """ from pandas.core.sparse.frame import SparseDataFrame return SparseDataFrame(self._series, index=self.index,