From ae676b88e414ea0ae79bbc789ab76319498d7830 Mon Sep 17 00:00:00 2001 From: Jaume Bonet Date: Sun, 25 Feb 2018 10:24:50 +0100 Subject: [PATCH 1/9] Remove Series._from_array Transfer its check to DataFrame._box_col_values --- pandas/core/frame.py | 15 +++++++++++---- pandas/core/series.py | 21 ++------------------- pandas/core/sparse/series.py | 6 ------ 3 files changed, 13 insertions(+), 29 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 061b69f25e7ac..a529446270bb8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2166,8 +2166,7 @@ def _ixs(self, i, axis=0): if index_len and not len(values): values = np.array([np.nan] * index_len, dtype=object) - result = self._constructor_sliced._from_array( - values, index=self.index, name=label, fastpath=True) + result = self._box_col_values(values, label) # this is a cached value, mark it so result._set_as_cached(label, self) @@ -2563,8 +2562,16 @@ def _box_item_values(self, key, values): def _box_col_values(self, values, items): """ provide boxed values for a column """ - return self._constructor_sliced._from_array(values, index=self.index, - name=items, fastpath=True) + # This check here was previously performed in Series._from_array + # By doing it here there is no need for that function anymore + # GH#19883. + from pandas.core.dtypes.generic import ABCSparseArray + this_constructor_sliced = self._constructor_sliced + if isinstance(values, ABCSparseArray): + from pandas.core.sparse.series import SparseSeries + this_constructor_sliced = SparseSeries + return this_constructor_sliced(values, index=self.index, + name=items, fastpath=True) def __setitem__(self, key, value): key = com._apply_if_callable(key, self) diff --git a/pandas/core/series.py b/pandas/core/series.py index b42e02bc99237..06016ee183f35 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -305,25 +305,8 @@ def from_array(cls, arr, index=None, name=None, dtype=None, copy=False, warnings.warn("'from_array' is deprecated and will be removed in a " "future version. Please use the pd.Series(..) " "constructor instead.", FutureWarning, stacklevel=2) - return cls._from_array(arr, index=index, name=name, dtype=dtype, - copy=copy, fastpath=fastpath) - - @classmethod - def _from_array(cls, arr, index=None, name=None, dtype=None, copy=False, - fastpath=False): - """ - Internal method used in DataFrame.__setitem__/__getitem__. - Difference with Series(..) is that this method checks if a sparse - array is passed. - - """ - # return a sparse series here - if isinstance(arr, ABCSparseArray): - from pandas.core.sparse.series import SparseSeries - cls = SparseSeries - - return cls(arr, index=index, name=name, dtype=dtype, copy=copy, - fastpath=fastpath) + return cls(arr, index=index, name=name, dtype=dtype, + copy=copy, fastpath=fastpath) @property def _constructor(self): diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 26cf9dbadbbf2..3a6b87f832ff7 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -216,12 +216,6 @@ def from_array(cls, arr, index=None, name=None, copy=False, warnings.warn("'from_array' is deprecated and will be removed in a " "future version. Please use the pd.SparseSeries(..) " "constructor instead.", FutureWarning, stacklevel=2) - return cls._from_array(arr, index=index, name=name, copy=copy, - fill_value=fill_value, fastpath=fastpath) - - @classmethod - def _from_array(cls, arr, index=None, name=None, copy=False, - fill_value=None, fastpath=False): return cls(arr, index=index, name=name, copy=copy, fill_value=fill_value, fastpath=fastpath) From 0c67e5a085fed7fbd008735397c457e4ad143d2b Mon Sep 17 00:00:00 2001 From: Jaume Bonet Date: Sun, 25 Feb 2018 10:33:54 +0100 Subject: [PATCH 2/9] keep original function of Series.from_array until is properly deleted --- pandas/core/series.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index 06016ee183f35..e7e0959b42c61 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -305,6 +305,9 @@ def from_array(cls, arr, index=None, name=None, dtype=None, copy=False, warnings.warn("'from_array' is deprecated and will be removed in a " "future version. Please use the pd.Series(..) " "constructor instead.", FutureWarning, stacklevel=2) + if isinstance(values, ABCSparseArray): + from pandas.core.sparse.series import SparseSeries + cls = SparseSeries return cls(arr, index=index, name=name, dtype=dtype, copy=copy, fastpath=fastpath) From 3d6a4dd3ede7c96f1e837c387918bb03ac93ef08 Mon Sep 17 00:00:00 2001 From: Jaume Bonet Date: Sun, 25 Feb 2018 10:35:51 +0100 Subject: [PATCH 3/9] variable name error fix --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index e7e0959b42c61..17f91606c1062 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -305,7 +305,7 @@ def from_array(cls, arr, index=None, name=None, dtype=None, copy=False, warnings.warn("'from_array' is deprecated and will be removed in a " "future version. Please use the pd.Series(..) " "constructor instead.", FutureWarning, stacklevel=2) - if isinstance(values, ABCSparseArray): + if isinstance(arr, ABCSparseArray): from pandas.core.sparse.series import SparseSeries cls = SparseSeries return cls(arr, index=index, name=name, dtype=dtype, From e27554ca6e397a18b638efc30b8e5fcecfbe96be Mon Sep 17 00:00:00 2001 From: Jaume Bonet Date: Sun, 25 Feb 2018 12:08:58 +0100 Subject: [PATCH 4/9] add test_frame_subclassing_and_inherit --- pandas/tests/frame/test_subclass.py | 56 +++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index caaa311e9ee96..982eed82f0fb9 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -570,3 +570,59 @@ def strech(row): result = df.apply(lambda x: [1, 2, 3], axis=1) assert not isinstance(result, tm.SubclassedDataFrame) tm.assert_series_equal(result, expected) + + def test_frame_subclassing_and_inherit(self): + # Subclass frame and series and ensure that data can be transfered between them + # on slicing GH#19883 + + class CustomSeries(Series): + + def __init__(self, *args, **kw): + super(CustomSeries, self).__init__(*args, **kw) + self.extra_data = None + + @property + def _constructor(self): + return CustomSeries + + def __finalize__(self, other, method=None, **kwargs): + if method == "_inherit": + self.extra_data = other.extra_data + return self + + class CustomDataFrame(DataFrame): + """ + Subclasses pandas DF, fills DF with simulation results, adds some + custom temporal data. + """ + + def __init__(self, *args, **kw): + super(CustomDataFrame, self).__init__(*args, **kw) + self.extra_data = None + + @property + def _constructor(self): + return CustomDataFrame + + @property + def _constructor_sliced(self): + def f(*args, **kwargs): + return CustomSeries(*args, **kwargs).__finalize__(self, method='_inherit') + return f + + data = {'col1': range(10), + 'col2': range(10)} + cdf = CustomDataFrame(data) + cdf.extra_data = range(3) + + # column + cdf_series = cdf.col1 + assert cdf_series.extra_data == cdf.extra_data + + # row + cdf_series = cdf.iloc[0] + assert cdf_series.extra_data == cdf.extra_data + + + + From c64392202c76cee8680f08fcd2b75b829e5d90da Mon Sep 17 00:00:00 2001 From: Jaume Bonet Date: Sun, 25 Feb 2018 12:41:49 +0100 Subject: [PATCH 5/9] pep8 fixes --- pandas/tests/frame/test_subclass.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 982eed82f0fb9..e792418592960 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -572,8 +572,8 @@ def strech(row): tm.assert_series_equal(result, expected) def test_frame_subclassing_and_inherit(self): - # Subclass frame and series and ensure that data can be transfered between them - # on slicing GH#19883 + # Subclass frame and series and ensure that data can be transfered + # between them on slicing GH#19883 class CustomSeries(Series): @@ -607,7 +607,8 @@ def _constructor(self): @property def _constructor_sliced(self): def f(*args, **kwargs): - return CustomSeries(*args, **kwargs).__finalize__(self, method='_inherit') + return CustomSeries(*args, **kwargs).__finalize__( + self, method='_inherit') return f data = {'col1': range(10), @@ -622,7 +623,3 @@ def f(*args, **kwargs): # row cdf_series = cdf.iloc[0] assert cdf_series.extra_data == cdf.extra_data - - - - From 545de1eeee029ae187df4101ac356fa2efec941e Mon Sep 17 00:00:00 2001 From: Jaume Bonet Date: Sun, 25 Feb 2018 19:17:41 +0100 Subject: [PATCH 6/9] _get_sliced_frame_result_type to check type of DataFrame._constructor_sliced --- pandas/core/dtypes/concat.py | 12 +++++++ pandas/core/frame.py | 10 ++---- pandas/tests/frame/test_subclass.py | 53 ----------------------------- 3 files changed, 14 insertions(+), 61 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index d306d0d78f1f4..cc6246084f91d 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -101,6 +101,18 @@ def _get_frame_result_type(result, objs): ABCSparseDataFrame)) +def _get_sliced_frame_result_type(data, obj): + """ + return appropriate class of Series depending on whether + the data is sparse or not. + """ + if is_sparse(data): + from pandas.core.sparse.api import SparseSeries + return SparseSeries + else: + return obj._constructor_sliced + + def _concat_compat(to_concat, axis=0): """ provide concatenation of an array of arrays each of which is a single diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a529446270bb8..776ccc44a63f9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2562,14 +2562,8 @@ def _box_item_values(self, key, values): def _box_col_values(self, values, items): """ provide boxed values for a column """ - # This check here was previously performed in Series._from_array - # By doing it here there is no need for that function anymore - # GH#19883. - from pandas.core.dtypes.generic import ABCSparseArray - this_constructor_sliced = self._constructor_sliced - if isinstance(values, ABCSparseArray): - from pandas.core.sparse.series import SparseSeries - this_constructor_sliced = SparseSeries + from pandas.core.dtypes.concat import _get_sliced_frame_result_type + this_constructor_sliced = _get_sliced_frame_result_type(values, self) return this_constructor_sliced(values, index=self.index, name=items, fastpath=True) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index e792418592960..caaa311e9ee96 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -570,56 +570,3 @@ def strech(row): result = df.apply(lambda x: [1, 2, 3], axis=1) assert not isinstance(result, tm.SubclassedDataFrame) tm.assert_series_equal(result, expected) - - def test_frame_subclassing_and_inherit(self): - # Subclass frame and series and ensure that data can be transfered - # between them on slicing GH#19883 - - class CustomSeries(Series): - - def __init__(self, *args, **kw): - super(CustomSeries, self).__init__(*args, **kw) - self.extra_data = None - - @property - def _constructor(self): - return CustomSeries - - def __finalize__(self, other, method=None, **kwargs): - if method == "_inherit": - self.extra_data = other.extra_data - return self - - class CustomDataFrame(DataFrame): - """ - Subclasses pandas DF, fills DF with simulation results, adds some - custom temporal data. - """ - - def __init__(self, *args, **kw): - super(CustomDataFrame, self).__init__(*args, **kw) - self.extra_data = None - - @property - def _constructor(self): - return CustomDataFrame - - @property - def _constructor_sliced(self): - def f(*args, **kwargs): - return CustomSeries(*args, **kwargs).__finalize__( - self, method='_inherit') - return f - - data = {'col1': range(10), - 'col2': range(10)} - cdf = CustomDataFrame(data) - cdf.extra_data = range(3) - - # column - cdf_series = cdf.col1 - assert cdf_series.extra_data == cdf.extra_data - - # row - cdf_series = cdf.iloc[0] - assert cdf_series.extra_data == cdf.extra_data From f000fdb9bb03c6fc15ea038db69833557ca4fcdc Mon Sep 17 00:00:00 2001 From: Jaume Bonet Date: Mon, 26 Feb 2018 10:21:35 +0100 Subject: [PATCH 7/9] style fixes --- pandas/core/dtypes/concat.py | 17 +++++++++++++---- pandas/core/frame.py | 7 +++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index cc6246084f91d..86e663b403db9 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -103,14 +103,23 @@ def _get_frame_result_type(result, objs): def _get_sliced_frame_result_type(data, obj): """ - return appropriate class of Series depending on whether - the data is sparse or not. + return appropriate class of Series. When data is sparse + it will return a SparseSeries, otherwise it will return + the Series. + + Parameters + ---------- + data : ndarray + obj : DataFrame + + Returns + ------- + Series or SparseSeries """ if is_sparse(data): from pandas.core.sparse.api import SparseSeries return SparseSeries - else: - return obj._constructor_sliced + return obj._constructor_sliced def _concat_compat(to_concat, axis=0): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 776ccc44a63f9..5e9c7d5e4e699 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -60,6 +60,7 @@ is_iterator, is_sequence, is_named_tuple) +from pandas.core.dtypes.concat import _get_sliced_frame_result_type from pandas.core.dtypes.missing import isna, notna @@ -2562,10 +2563,8 @@ def _box_item_values(self, key, values): def _box_col_values(self, values, items): """ provide boxed values for a column """ - from pandas.core.dtypes.concat import _get_sliced_frame_result_type - this_constructor_sliced = _get_sliced_frame_result_type(values, self) - return this_constructor_sliced(values, index=self.index, - name=items, fastpath=True) + klass = _get_sliced_frame_result_type(values, self) + return klass(values, index=self.index, name=items, fastpath=True) def __setitem__(self, key, value): key = com._apply_if_callable(key, self) From 356b02b1b166d8d5d147f13b443eaf24da314eb8 Mon Sep 17 00:00:00 2001 From: Jaume Bonet Date: Mon, 26 Feb 2018 14:39:05 +0100 Subject: [PATCH 8/9] style updates --- pandas/core/dtypes/concat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 86e663b403db9..49eb2e7007325 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -109,7 +109,7 @@ def _get_sliced_frame_result_type(data, obj): Parameters ---------- - data : ndarray + data : array-like obj : DataFrame Returns @@ -117,7 +117,7 @@ def _get_sliced_frame_result_type(data, obj): Series or SparseSeries """ if is_sparse(data): - from pandas.core.sparse.api import SparseSeries + from pandas import SparseSeries return SparseSeries return obj._constructor_sliced From e4ec9874071998b1d013af0ed31ffd33574ad549 Mon Sep 17 00:00:00 2001 From: Jaume Bonet Date: Mon, 26 Feb 2018 15:55:07 +0100 Subject: [PATCH 9/9] rollback import in _get_sliced_frame_result_type --- pandas/core/dtypes/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 49eb2e7007325..0501493e718d0 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -117,7 +117,7 @@ def _get_sliced_frame_result_type(data, obj): Series or SparseSeries """ if is_sparse(data): - from pandas import SparseSeries + from pandas.core.sparse.api import SparseSeries return SparseSeries return obj._constructor_sliced