|
17 | 17 | from pandas.errors import PerformanceWarning
|
18 | 18 | from pandas.compat.numpy import function as nv
|
19 | 19 |
|
| 20 | +from pandas.core.accessor import PandasDelegate, delegate_names |
20 | 21 | from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
|
21 | 22 | import pandas.core.common as com
|
22 | 23 | from pandas.core.dtypes.base import ExtensionDtype
|
@@ -178,6 +179,7 @@ def _is_boolean(self):
|
178 | 179 |
|
179 | 180 | @property
|
180 | 181 | def kind(self):
|
| 182 | + """The sparse kind. Either 'integer', or 'block'.""" |
181 | 183 | return self.subtype.kind
|
182 | 184 |
|
183 | 185 | @property
|
@@ -648,10 +650,22 @@ def _from_factorized(cls, values, original):
|
648 | 650 | # ------------------------------------------------------------------------
|
649 | 651 | @property
|
650 | 652 | def sp_index(self):
|
| 653 | + """ |
| 654 | + The SparseIndex containing the location of non- ``fill_value`` points. |
| 655 | + """ |
651 | 656 | return self._sparse_index
|
652 | 657 |
|
653 | 658 | @property
|
654 | 659 | def sp_values(self):
|
| 660 | + """ |
| 661 | + An ndarray containing the non- ``fill_value`` values. |
| 662 | +
|
| 663 | + Examples |
| 664 | + -------- |
| 665 | + >>> s = SparseArray([0, 0, 1, 0, 2], fill_value=0) |
| 666 | + >>> s.sp_values |
| 667 | + array([1, 2]) |
| 668 | + """ |
655 | 669 | return self._sparse_values
|
656 | 670 |
|
657 | 671 | @property
|
@@ -704,6 +718,31 @@ def _fill_value_matches(self, fill_value):
|
704 | 718 | def nbytes(self):
|
705 | 719 | return self.sp_values.nbytes + self.sp_index.nbytes
|
706 | 720 |
|
| 721 | + @property |
| 722 | + def density(self): |
| 723 | + """The percent of non- ``fill_value`` points, as decimal. |
| 724 | +
|
| 725 | + Examples |
| 726 | + -------- |
| 727 | + >>> s = SparseArray([0, 0, 1, 1, 1], fill_value=0) |
| 728 | + >>> s.density |
| 729 | + 0.6 |
| 730 | + """ |
| 731 | + r = float(self.sp_index.npoints) / float(self.sp_index.length) |
| 732 | + return r |
| 733 | + |
| 734 | + @property |
| 735 | + def npoints(self): |
| 736 | + """The number of non- ``fill_value`` points. |
| 737 | +
|
| 738 | + Examples |
| 739 | + -------- |
| 740 | + >>> s = SparseArray([0, 0, 1, 1, 1], fill_value=0) |
| 741 | + >>> s.npoints |
| 742 | + 3 |
| 743 | + """ |
| 744 | + return self.sp_index.npoints |
| 745 | + |
707 | 746 | @property
|
708 | 747 | def values(self):
|
709 | 748 | """
|
@@ -1744,3 +1783,138 @@ def _make_index(length, indices, kind):
|
1744 | 1783 | else: # pragma: no cover
|
1745 | 1784 | raise ValueError('must be block or integer type')
|
1746 | 1785 | return index
|
| 1786 | + |
| 1787 | + |
| 1788 | +# ---------------------------------------------------------------------------- |
| 1789 | +# Accessor |
| 1790 | + |
| 1791 | +@delegate_names(SparseArray, ['npoints', 'density', 'fill_value', |
| 1792 | + 'sp_values'], |
| 1793 | + typ='property') |
| 1794 | +class SparseAccessor(PandasDelegate): |
| 1795 | + def __init__(self, data=None): |
| 1796 | + self._validate(data) |
| 1797 | + # Store the Series since we need that for to_coo |
| 1798 | + self._parent = data |
| 1799 | + |
| 1800 | + @staticmethod |
| 1801 | + def _validate(data): |
| 1802 | + if not isinstance(data.dtype, SparseDtype): |
| 1803 | + msg = "Can only use the '.sparse' accessor with Sparse data." |
| 1804 | + raise AttributeError(msg) |
| 1805 | + |
| 1806 | + def _delegate_property_get(self, name, *args, **kwargs): |
| 1807 | + return getattr(self._parent.values, name) |
| 1808 | + |
| 1809 | + def _delegate_method(self, name, *args, **kwargs): |
| 1810 | + if name == 'from_coo': |
| 1811 | + return self.from_coo(*args, **kwargs) |
| 1812 | + elif name == 'to_coo': |
| 1813 | + return self.to_coo(*args, **kwargs) |
| 1814 | + else: |
| 1815 | + raise ValueError |
| 1816 | + |
| 1817 | + @classmethod |
| 1818 | + def from_coo(cls, A, dense_index=False): |
| 1819 | + """ |
| 1820 | + Create a SparseSeries from a scipy.sparse.coo_matrix. |
| 1821 | +
|
| 1822 | + Parameters |
| 1823 | + ---------- |
| 1824 | + A : scipy.sparse.coo_matrix |
| 1825 | + dense_index : bool, default False |
| 1826 | + If False (default), the SparseSeries index consists of only the |
| 1827 | + coords of the non-null entries of the original coo_matrix. |
| 1828 | + If True, the SparseSeries index consists of the full sorted |
| 1829 | + (row, col) coordinates of the coo_matrix. |
| 1830 | +
|
| 1831 | + Returns |
| 1832 | + ------- |
| 1833 | + s : SparseSeries |
| 1834 | +
|
| 1835 | + Examples |
| 1836 | + --------- |
| 1837 | + >>> from scipy import sparse |
| 1838 | + >>> A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), |
| 1839 | + shape=(3, 4)) |
| 1840 | + >>> A |
| 1841 | + <3x4 sparse matrix of type '<class 'numpy.float64'>' |
| 1842 | + with 3 stored elements in COOrdinate format> |
| 1843 | + >>> A.todense() |
| 1844 | + matrix([[ 0., 0., 1., 2.], |
| 1845 | + [ 3., 0., 0., 0.], |
| 1846 | + [ 0., 0., 0., 0.]]) |
| 1847 | + >>> ss = pd.SparseSeries.from_coo(A) |
| 1848 | + >>> ss |
| 1849 | + 0 2 1 |
| 1850 | + 3 2 |
| 1851 | + 1 0 3 |
| 1852 | + dtype: float64 |
| 1853 | + BlockIndex |
| 1854 | + Block locations: array([0], dtype=int32) |
| 1855 | + Block lengths: array([3], dtype=int32) |
| 1856 | + """ |
| 1857 | + from pandas.core.sparse.scipy_sparse import _coo_to_sparse_series |
| 1858 | + from pandas import Series |
| 1859 | + |
| 1860 | + result = _coo_to_sparse_series(A, dense_index=dense_index) |
| 1861 | + # SparseSeries -> Series[sparse] |
| 1862 | + result = Series(result.values, index=result.index, copy=False) |
| 1863 | + |
| 1864 | + return result |
| 1865 | + |
| 1866 | + def to_coo(self, row_levels=(0, ), column_levels=(1, ), sort_labels=False): |
| 1867 | + """ |
| 1868 | + Create a scipy.sparse.coo_matrix from a SparseSeries with MultiIndex. |
| 1869 | +
|
| 1870 | + Use row_levels and column_levels to determine the row and column |
| 1871 | + coordinates respectively. row_levels and column_levels are the names |
| 1872 | + (labels) or numbers of the levels. {row_levels, column_levels} must be |
| 1873 | + a partition of the MultiIndex level names (or numbers). |
| 1874 | +
|
| 1875 | + Parameters |
| 1876 | + ---------- |
| 1877 | + row_levels : tuple/list |
| 1878 | + column_levels : tuple/list |
| 1879 | + sort_labels : bool, default False |
| 1880 | + Sort the row and column labels before forming the sparse matrix. |
| 1881 | +
|
| 1882 | + Returns |
| 1883 | + ------- |
| 1884 | + y : scipy.sparse.coo_matrix |
| 1885 | + rows : list (row labels) |
| 1886 | + columns : list (column labels) |
| 1887 | +
|
| 1888 | + Examples |
| 1889 | + -------- |
| 1890 | + >>> s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan]) |
| 1891 | + >>> s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0), |
| 1892 | + (1, 2, 'a', 1), |
| 1893 | + (1, 1, 'b', 0), |
| 1894 | + (1, 1, 'b', 1), |
| 1895 | + (2, 1, 'b', 0), |
| 1896 | + (2, 1, 'b', 1)], |
| 1897 | + names=['A', 'B', 'C', 'D']) |
| 1898 | + >>> ss = s.to_sparse() |
| 1899 | + >>> A, rows, columns = ss.to_coo(row_levels=['A', 'B'], |
| 1900 | + column_levels=['C', 'D'], |
| 1901 | + sort_labels=True) |
| 1902 | + >>> A |
| 1903 | + <3x4 sparse matrix of type '<class 'numpy.float64'>' |
| 1904 | + with 3 stored elements in COOrdinate format> |
| 1905 | + >>> A.todense() |
| 1906 | + matrix([[ 0., 0., 1., 3.], |
| 1907 | + [ 3., 0., 0., 0.], |
| 1908 | + [ 0., 0., 0., 0.]]) |
| 1909 | + >>> rows |
| 1910 | + [(1, 1), (1, 2), (2, 1)] |
| 1911 | + >>> columns |
| 1912 | + [('a', 0), ('a', 1), ('b', 0), ('b', 1)] |
| 1913 | + """ |
| 1914 | + from pandas.core.sparse.scipy_sparse import _sparse_series_to_coo |
| 1915 | + |
| 1916 | + A, rows, columns = _sparse_series_to_coo(self._parent, |
| 1917 | + row_levels, |
| 1918 | + column_levels, |
| 1919 | + sort_labels=sort_labels) |
| 1920 | + return A, rows, columns |
0 commit comments