From ee696fcf0e4a6c924f85495d25589d86f58263a6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 1 Dec 2019 14:56:35 -0800 Subject: [PATCH 1/6] missing whatsnew notes --- doc/source/whatsnew/v1.0.0.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4e8a471239610..ed78796152f49 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -409,6 +409,9 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`) - Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to ``False`` (:issue:`27600`) - Removed the previously deprecated :attr:`Series.cat.categorical`, :attr:`Series.cat.index`, :attr:`Series.cat.name` (:issue:`24751`) +- Removed the previously deprecated ``time_rule`` keyword from (non-public) :func:`offsets.generate_range`, which has been moved to :func:`core.arrays._ranges.generate_range` (:issue:`24157`) +- :meth:`DataFrame.loc` or :meth:`Series.loc` with listlike indexers and missing labels will no longer reindex (:issue:`17295`) +- :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` with non-existent columns will no longer reindex (:issue:`17295`) - Removed the previously deprecated "by" keyword from :meth:`DataFrame.sort_index`, use :meth:`DataFrame.sort_values` instead (:issue:`10726`) - Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`) - Passing ``datetime64`` data to :class:`TimedeltaIndex` or ``timedelta64`` data to ``DatetimeIndex`` now raises ``TypeError`` (:issue:`23539`, :issue:`23937`) From 4c6c4b57b77d2980586a0959f041d316778488ab Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 1 Dec 2019 15:03:36 -0800 Subject: [PATCH 2/6] TST: tests no longer shared with sparse --- pandas/tests/arrays/test_datetimelike.py | 4 +- pandas/tests/frame/test_api.py | 103 ++++++++--------------- pandas/tests/series/test_api.py | 66 ++++++--------- 3 files changed, 60 insertions(+), 113 deletions(-) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 5cab0c1fe6d59..84b6d45b78fe8 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -40,8 +40,8 @@ def datetime_index(request): """ freqstr = request.param # TODO: non-monotone indexes; NaTs, different start dates, timezones - pi = pd.date_range(start=pd.Timestamp("2000-01-01"), periods=100, freq=freqstr) - return pi + dti = pd.date_range(start=pd.Timestamp("2000-01-01"), periods=100, freq=freqstr) + return dti @pytest.fixture diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index a86e1dfe8353c..60befe5e73d37 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -6,35 +6,11 @@ import pytest import pandas as pd -from pandas import ( - Categorical, - DataFrame, - Series, - SparseDtype, - compat, - date_range, - timedelta_range, -) +from pandas import Categorical, DataFrame, Series, compat, date_range, timedelta_range import pandas.util.testing as tm -class SharedWithSparse: - """ - A collection of tests DataFrame and SparseDataFrame can share. - - In generic tests on this class, use ``self._assert_frame_equal()`` and - ``self._assert_series_equal()`` which are implemented in sub-classes - and dispatch correctly. - """ - - def _assert_frame_equal(self, left, right): - """Dispatch to frame class dependent assertion""" - raise NotImplementedError - - def _assert_series_equal(self, left, right): - """Dispatch to series class dependent assertion""" - raise NotImplementedError - +class TestDataFrameMisc: def test_copy_index_name_checking(self, float_frame): # don't want to be able to modify the index stored elsewhere after # making a copy @@ -141,16 +117,16 @@ def test_tab_completion(self): def test_not_hashable(self): empty_frame = DataFrame() - df = self.klass([1]) - msg = "'(Sparse)?DataFrame' objects are mutable, thus they cannot be hashed" + df = DataFrame([1]) + msg = "'DataFrame' objects are mutable, thus they cannot be hashed" with pytest.raises(TypeError, match=msg): hash(df) with pytest.raises(TypeError, match=msg): hash(empty_frame) def test_new_empty_index(self): - df1 = self.klass(np.random.randn(0, 3)) - df2 = self.klass(np.random.randn(0, 3)) + df1 = DataFrame(np.random.randn(0, 3)) + df2 = DataFrame(np.random.randn(0, 3)) df1.index.name = "foo" assert df2.index.name is None @@ -161,7 +137,7 @@ def test_array_interface(self, float_frame): assert result.index is float_frame.index assert result.columns is float_frame.columns - self._assert_frame_equal(result, float_frame.apply(np.sqrt)) + tm.assert_frame_equal(result, float_frame.apply(np.sqrt)) def test_get_agg_axis(self, float_frame): cols = float_frame._get_agg_axis(0) @@ -187,9 +163,9 @@ def test_nonzero(self, float_frame, float_string_frame): assert not df.empty def test_iteritems(self): - df = self.klass([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) for k, v in df.items(): - assert isinstance(v, self.klass._constructor_sliced) + assert isinstance(v, DataFrame._constructor_sliced) def test_items(self): # GH 17213, GH 13918 @@ -206,15 +182,15 @@ def test_iter(self, float_frame): def test_iterrows(self, float_frame, float_string_frame): for k, v in float_frame.iterrows(): exp = float_frame.loc[k] - self._assert_series_equal(v, exp) + tm.assert_series_equal(v, exp) for k, v in float_string_frame.iterrows(): exp = float_string_frame.loc[k] - self._assert_series_equal(v, exp) + tm.assert_series_equal(v, exp) def test_iterrows_iso8601(self): # GH 19671 - s = self.klass( + s = DataFrame( { "non_iso8601": ["M1701", "M1802", "M1903", "M2004"], "iso8601": date_range("2000-01-01", periods=4, freq="M"), @@ -222,7 +198,7 @@ def test_iterrows_iso8601(self): ) for k, v in s.iterrows(): exp = s.loc[k] - self._assert_series_equal(v, exp) + tm.assert_series_equal(v, exp) def test_iterrows_corner(self): # gh-12222 @@ -248,19 +224,19 @@ def test_iterrows_corner(self): def test_itertuples(self, float_frame): for i, tup in enumerate(float_frame.itertuples()): - s = self.klass._constructor_sliced(tup[1:]) + s = DataFrame._constructor_sliced(tup[1:]) s.name = tup[0] expected = float_frame.iloc[i, :].reset_index(drop=True) - self._assert_series_equal(s, expected) + tm.assert_series_equal(s, expected) - df = self.klass( + df = DataFrame( {"floats": np.random.randn(5), "ints": range(5)}, columns=["floats", "ints"] ) for tup in df.itertuples(index=False): assert isinstance(tup[1], int) - df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]}) + df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) dfaa = df[["a", "a"]] assert list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)] @@ -315,7 +291,7 @@ def test_sequence_like_with_categorical(self): def test_len(self, float_frame): assert len(float_frame) == len(float_frame.index) - def test_values(self, float_frame, float_string_frame): + def test_values_mixed_dtypes(self, float_frame, float_string_frame): frame = float_frame arr = frame.values @@ -332,7 +308,7 @@ def test_values(self, float_frame, float_string_frame): arr = float_string_frame[["foo", "A"]].values assert arr[0, 0] == "bar" - df = self.klass({"complex": [1j, 2j, 3j], "real": [1, 2, 3]}) + df = DataFrame({"complex": [1j, 2j, 3j], "real": [1, 2, 3]}) arr = df.values assert arr[0, 0] == 1j @@ -372,17 +348,17 @@ def test_transpose(self, float_frame): # mixed type index, data = tm.getMixedTypeDict() - mixed = self.klass(data, index=index) + mixed = DataFrame(data, index=index) mixed_T = mixed.T for col, s in mixed_T.items(): assert s.dtype == np.object_ def test_swapaxes(self): - df = self.klass(np.random.randn(10, 5)) - self._assert_frame_equal(df.T, df.swapaxes(0, 1)) - self._assert_frame_equal(df.T, df.swapaxes(1, 0)) - self._assert_frame_equal(df, df.swapaxes(0, 0)) + df = DataFrame(np.random.randn(10, 5)) + tm.assert_frame_equal(df.T, df.swapaxes(0, 1)) + tm.assert_frame_equal(df.T, df.swapaxes(1, 0)) + tm.assert_frame_equal(df, df.swapaxes(0, 0)) msg = ( "No axis named 2 for object type" r" " @@ -413,7 +389,7 @@ def test_more_values(self, float_string_frame): assert values.shape[1] == len(float_string_frame.columns) def test_repr_with_mi_nat(self, float_string_frame): - df = self.klass( + df = DataFrame( {"X": [1, 2]}, index=[[pd.NaT, pd.Timestamp("20130101")], ["a", "b"]] ) result = repr(df) @@ -430,18 +406,18 @@ def test_series_put_names(self, float_string_frame): assert v.name == k def test_empty_nonzero(self): - df = self.klass([1, 2, 3]) + df = DataFrame([1, 2, 3]) assert not df.empty - df = self.klass(index=[1], columns=[1]) + df = DataFrame(index=[1], columns=[1]) assert not df.empty - df = self.klass(index=["a", "b"], columns=["c", "d"]).dropna() + df = DataFrame(index=["a", "b"], columns=["c", "d"]).dropna() assert df.empty assert df.T.empty empty_frames = [ - self.klass(), - self.klass(index=[1]), - self.klass(columns=[1]), - self.klass({1: []}), + DataFrame(), + DataFrame(index=[1]), + DataFrame(columns=[1]), + DataFrame({1: []}), ] for df in empty_frames: assert df.empty @@ -449,7 +425,7 @@ def test_empty_nonzero(self): def test_with_datetimelikes(self): - df = self.klass( + df = DataFrame( { "A": date_range("20130101", periods=10), "B": timedelta_range("1 day", periods=10), @@ -458,20 +434,9 @@ def test_with_datetimelikes(self): t = df.T result = t.dtypes.value_counts() - if self.klass is DataFrame: - expected = Series({np.dtype("object"): 10}) - else: - expected = Series({SparseDtype(dtype=object): 10}) + expected = Series({np.dtype("object"): 10}) tm.assert_series_equal(result, expected) - -class TestDataFrameMisc(SharedWithSparse): - - klass = DataFrame - # SharedWithSparse tests use generic, klass-agnostic assertion - _assert_frame_equal = staticmethod(tm.assert_frame_equal) - _assert_series_equal = staticmethod(tm.assert_series_equal) - def test_values(self, float_frame): float_frame.values[:, 0] = 5.0 assert (float_frame.values[:, 0] == 5).all() diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 1e4757ffecb5d..42b2c37638c76 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -24,18 +24,7 @@ import pandas.io.formats.printing as printing -class SharedWithSparse: - """ - A collection of tests Series and SparseSeries can share. - - In generic tests on this class, use ``self._assert_series_equal()`` - which is implemented in sub-classes. - """ - - def _assert_series_equal(self, left, right): - """Dispatch to series class dependent assertion""" - raise NotImplementedError - +class TestSeriesMisc: def test_scalarop_preserve_name(self, datetime_series): result = datetime_series * 2 assert result.name == datetime_series.name @@ -132,19 +121,19 @@ def test_sort_index_name(self, datetime_series): def test_constructor_dict(self): d = {"a": 0.0, "b": 1.0, "c": 2.0} - result = self.series_klass(d) - expected = self.series_klass(d, index=sorted(d.keys())) - self._assert_series_equal(result, expected) + result = Series(d) + expected = Series(d, index=sorted(d.keys())) + tm.assert_series_equal(result, expected) - result = self.series_klass(d, index=["b", "c", "d", "a"]) - expected = self.series_klass([1, 2, np.nan, 0], index=["b", "c", "d", "a"]) - self._assert_series_equal(result, expected) + result = Series(d, index=["b", "c", "d", "a"]) + expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"]) + tm.assert_series_equal(result, expected) def test_constructor_subclass_dict(self): data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) - series = self.series_klass(data) - expected = self.series_klass(dict(data.items())) - self._assert_series_equal(series, expected) + series = Series(data) + expected = Series(dict(data.items())) + tm.assert_series_equal(series, expected) def test_constructor_ordereddict(self): # GH3283 @@ -152,44 +141,44 @@ def test_constructor_ordereddict(self): ("col{i}".format(i=i), np.random.random()) for i in range(12) ) - series = self.series_klass(data) - expected = self.series_klass(list(data.values()), list(data.keys())) - self._assert_series_equal(series, expected) + series = Series(data) + expected = Series(list(data.values()), list(data.keys())) + tm.assert_series_equal(series, expected) # Test with subclass class A(OrderedDict): pass - series = self.series_klass(A(data)) - self._assert_series_equal(series, expected) + series = Series(A(data)) + tm.assert_series_equal(series, expected) def test_constructor_dict_multiindex(self): d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0} _d = sorted(d.items()) - result = self.series_klass(d) - expected = self.series_klass( + result = Series(d) + expected = Series( [x[1] for x in _d], index=pd.MultiIndex.from_tuples([x[0] for x in _d]) ) - self._assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) d["z"] = 111.0 _d.insert(0, ("z", d["z"])) - result = self.series_klass(d) - expected = self.series_klass( + result = Series(d) + expected = Series( [x[1] for x in _d], index=pd.Index([x[0] for x in _d], tupleize_cols=False) ) result = result.reindex(index=expected.index) - self._assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_constructor_dict_timedelta_index(self): # GH #12169 : Resample category data with timedelta index # construct Series from dict as data and TimedeltaIndex as index # will result NaN in result Series data - expected = self.series_klass( + expected = Series( data=["A", "B", "C"], index=pd.to_timedelta([0, 10, 20], unit="s") ) - result = self.series_klass( + result = Series( data={ pd.to_timedelta(0, unit="s"): "A", pd.to_timedelta(10, unit="s"): "B", @@ -197,20 +186,13 @@ def test_constructor_dict_timedelta_index(self): }, index=pd.to_timedelta([0, 10, 20], unit="s"), ) - self._assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_sparse_accessor_updates_on_inplace(self): s = pd.Series([1, 1, 2, 3], dtype="Sparse[int]") s.drop([0, 1], inplace=True) assert s.sparse.density == 1.0 - -class TestSeriesMisc(SharedWithSparse): - - series_klass = Series - # SharedWithSparse tests use generic, series_klass-agnostic assertion - _assert_series_equal = staticmethod(tm.assert_series_equal) - def test_tab_completion(self): # GH 9910 s = Series(list("abcd")) From 4144d3d7030a2033aa98da14fc3a7fa663011465 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Dec 2019 08:29:33 -0800 Subject: [PATCH 3/6] check earlier --- pandas/core/computation/eval.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 6dc110e3f8d07..8e2f082a0daf6 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -167,7 +167,7 @@ def _check_for_locals(expr: str, stack_level: int, parser: str): def eval( expr, - parser="pandas", + parser: str = "pandas", engine=None, truediv=_no_default, local_dict=None, @@ -306,6 +306,9 @@ def eval( "multi-line expressions are only valid in the " "context of data, use DataFrame.eval" ) + engine = _check_engine(engine) + _check_parser(parser) + _check_resolvers(resolvers) ret = None first_expr = True @@ -313,9 +316,6 @@ def eval( for expr in exprs: expr = _convert_expression(expr) - engine = _check_engine(engine) - _check_parser(parser) - _check_resolvers(resolvers) _check_for_locals(expr, level, parser) # get our (possibly passed-in) scope From 9c7da9c4fdacccffc48f8d85b3c7730992ebee27 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Dec 2019 09:27:29 -0800 Subject: [PATCH 4/6] Revert pieces broken off into 30006 and 30007 --- doc/source/whatsnew/v1.0.0.rst | 3 - pandas/tests/frame/test_api.py | 103 +++++++++++++++++++++----------- pandas/tests/series/test_api.py | 66 ++++++++++++-------- 3 files changed, 111 insertions(+), 61 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index e533ebb0d0084..470209a7f4a33 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -506,9 +506,6 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`) - Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to ``False`` (:issue:`27600`) - Removed the previously deprecated :attr:`Series.cat.categorical`, :attr:`Series.cat.index`, :attr:`Series.cat.name` (:issue:`24751`) -- Removed the previously deprecated ``time_rule`` keyword from (non-public) :func:`offsets.generate_range`, which has been moved to :func:`core.arrays._ranges.generate_range` (:issue:`24157`) -- :meth:`DataFrame.loc` or :meth:`Series.loc` with listlike indexers and missing labels will no longer reindex (:issue:`17295`) -- :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` with non-existent columns will no longer reindex (:issue:`17295`) - Removed the previously deprecated "by" keyword from :meth:`DataFrame.sort_index`, use :meth:`DataFrame.sort_values` instead (:issue:`10726`) - Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`) - Passing ``datetime64`` data to :class:`TimedeltaIndex` or ``timedelta64`` data to ``DatetimeIndex`` now raises ``TypeError`` (:issue:`23539`, :issue:`23937`) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 60befe5e73d37..a86e1dfe8353c 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -6,11 +6,35 @@ import pytest import pandas as pd -from pandas import Categorical, DataFrame, Series, compat, date_range, timedelta_range +from pandas import ( + Categorical, + DataFrame, + Series, + SparseDtype, + compat, + date_range, + timedelta_range, +) import pandas.util.testing as tm -class TestDataFrameMisc: +class SharedWithSparse: + """ + A collection of tests DataFrame and SparseDataFrame can share. + + In generic tests on this class, use ``self._assert_frame_equal()`` and + ``self._assert_series_equal()`` which are implemented in sub-classes + and dispatch correctly. + """ + + def _assert_frame_equal(self, left, right): + """Dispatch to frame class dependent assertion""" + raise NotImplementedError + + def _assert_series_equal(self, left, right): + """Dispatch to series class dependent assertion""" + raise NotImplementedError + def test_copy_index_name_checking(self, float_frame): # don't want to be able to modify the index stored elsewhere after # making a copy @@ -117,16 +141,16 @@ def test_tab_completion(self): def test_not_hashable(self): empty_frame = DataFrame() - df = DataFrame([1]) - msg = "'DataFrame' objects are mutable, thus they cannot be hashed" + df = self.klass([1]) + msg = "'(Sparse)?DataFrame' objects are mutable, thus they cannot be hashed" with pytest.raises(TypeError, match=msg): hash(df) with pytest.raises(TypeError, match=msg): hash(empty_frame) def test_new_empty_index(self): - df1 = DataFrame(np.random.randn(0, 3)) - df2 = DataFrame(np.random.randn(0, 3)) + df1 = self.klass(np.random.randn(0, 3)) + df2 = self.klass(np.random.randn(0, 3)) df1.index.name = "foo" assert df2.index.name is None @@ -137,7 +161,7 @@ def test_array_interface(self, float_frame): assert result.index is float_frame.index assert result.columns is float_frame.columns - tm.assert_frame_equal(result, float_frame.apply(np.sqrt)) + self._assert_frame_equal(result, float_frame.apply(np.sqrt)) def test_get_agg_axis(self, float_frame): cols = float_frame._get_agg_axis(0) @@ -163,9 +187,9 @@ def test_nonzero(self, float_frame, float_string_frame): assert not df.empty def test_iteritems(self): - df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) + df = self.klass([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) for k, v in df.items(): - assert isinstance(v, DataFrame._constructor_sliced) + assert isinstance(v, self.klass._constructor_sliced) def test_items(self): # GH 17213, GH 13918 @@ -182,15 +206,15 @@ def test_iter(self, float_frame): def test_iterrows(self, float_frame, float_string_frame): for k, v in float_frame.iterrows(): exp = float_frame.loc[k] - tm.assert_series_equal(v, exp) + self._assert_series_equal(v, exp) for k, v in float_string_frame.iterrows(): exp = float_string_frame.loc[k] - tm.assert_series_equal(v, exp) + self._assert_series_equal(v, exp) def test_iterrows_iso8601(self): # GH 19671 - s = DataFrame( + s = self.klass( { "non_iso8601": ["M1701", "M1802", "M1903", "M2004"], "iso8601": date_range("2000-01-01", periods=4, freq="M"), @@ -198,7 +222,7 @@ def test_iterrows_iso8601(self): ) for k, v in s.iterrows(): exp = s.loc[k] - tm.assert_series_equal(v, exp) + self._assert_series_equal(v, exp) def test_iterrows_corner(self): # gh-12222 @@ -224,19 +248,19 @@ def test_iterrows_corner(self): def test_itertuples(self, float_frame): for i, tup in enumerate(float_frame.itertuples()): - s = DataFrame._constructor_sliced(tup[1:]) + s = self.klass._constructor_sliced(tup[1:]) s.name = tup[0] expected = float_frame.iloc[i, :].reset_index(drop=True) - tm.assert_series_equal(s, expected) + self._assert_series_equal(s, expected) - df = DataFrame( + df = self.klass( {"floats": np.random.randn(5), "ints": range(5)}, columns=["floats", "ints"] ) for tup in df.itertuples(index=False): assert isinstance(tup[1], int) - df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) + df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]}) dfaa = df[["a", "a"]] assert list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)] @@ -291,7 +315,7 @@ def test_sequence_like_with_categorical(self): def test_len(self, float_frame): assert len(float_frame) == len(float_frame.index) - def test_values_mixed_dtypes(self, float_frame, float_string_frame): + def test_values(self, float_frame, float_string_frame): frame = float_frame arr = frame.values @@ -308,7 +332,7 @@ def test_values_mixed_dtypes(self, float_frame, float_string_frame): arr = float_string_frame[["foo", "A"]].values assert arr[0, 0] == "bar" - df = DataFrame({"complex": [1j, 2j, 3j], "real": [1, 2, 3]}) + df = self.klass({"complex": [1j, 2j, 3j], "real": [1, 2, 3]}) arr = df.values assert arr[0, 0] == 1j @@ -348,17 +372,17 @@ def test_transpose(self, float_frame): # mixed type index, data = tm.getMixedTypeDict() - mixed = DataFrame(data, index=index) + mixed = self.klass(data, index=index) mixed_T = mixed.T for col, s in mixed_T.items(): assert s.dtype == np.object_ def test_swapaxes(self): - df = DataFrame(np.random.randn(10, 5)) - tm.assert_frame_equal(df.T, df.swapaxes(0, 1)) - tm.assert_frame_equal(df.T, df.swapaxes(1, 0)) - tm.assert_frame_equal(df, df.swapaxes(0, 0)) + df = self.klass(np.random.randn(10, 5)) + self._assert_frame_equal(df.T, df.swapaxes(0, 1)) + self._assert_frame_equal(df.T, df.swapaxes(1, 0)) + self._assert_frame_equal(df, df.swapaxes(0, 0)) msg = ( "No axis named 2 for object type" r" " @@ -389,7 +413,7 @@ def test_more_values(self, float_string_frame): assert values.shape[1] == len(float_string_frame.columns) def test_repr_with_mi_nat(self, float_string_frame): - df = DataFrame( + df = self.klass( {"X": [1, 2]}, index=[[pd.NaT, pd.Timestamp("20130101")], ["a", "b"]] ) result = repr(df) @@ -406,18 +430,18 @@ def test_series_put_names(self, float_string_frame): assert v.name == k def test_empty_nonzero(self): - df = DataFrame([1, 2, 3]) + df = self.klass([1, 2, 3]) assert not df.empty - df = DataFrame(index=[1], columns=[1]) + df = self.klass(index=[1], columns=[1]) assert not df.empty - df = DataFrame(index=["a", "b"], columns=["c", "d"]).dropna() + df = self.klass(index=["a", "b"], columns=["c", "d"]).dropna() assert df.empty assert df.T.empty empty_frames = [ - DataFrame(), - DataFrame(index=[1]), - DataFrame(columns=[1]), - DataFrame({1: []}), + self.klass(), + self.klass(index=[1]), + self.klass(columns=[1]), + self.klass({1: []}), ] for df in empty_frames: assert df.empty @@ -425,7 +449,7 @@ def test_empty_nonzero(self): def test_with_datetimelikes(self): - df = DataFrame( + df = self.klass( { "A": date_range("20130101", periods=10), "B": timedelta_range("1 day", periods=10), @@ -434,9 +458,20 @@ def test_with_datetimelikes(self): t = df.T result = t.dtypes.value_counts() - expected = Series({np.dtype("object"): 10}) + if self.klass is DataFrame: + expected = Series({np.dtype("object"): 10}) + else: + expected = Series({SparseDtype(dtype=object): 10}) tm.assert_series_equal(result, expected) + +class TestDataFrameMisc(SharedWithSparse): + + klass = DataFrame + # SharedWithSparse tests use generic, klass-agnostic assertion + _assert_frame_equal = staticmethod(tm.assert_frame_equal) + _assert_series_equal = staticmethod(tm.assert_series_equal) + def test_values(self, float_frame): float_frame.values[:, 0] = 5.0 assert (float_frame.values[:, 0] == 5).all() diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 42b2c37638c76..1e4757ffecb5d 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -24,7 +24,18 @@ import pandas.io.formats.printing as printing -class TestSeriesMisc: +class SharedWithSparse: + """ + A collection of tests Series and SparseSeries can share. + + In generic tests on this class, use ``self._assert_series_equal()`` + which is implemented in sub-classes. + """ + + def _assert_series_equal(self, left, right): + """Dispatch to series class dependent assertion""" + raise NotImplementedError + def test_scalarop_preserve_name(self, datetime_series): result = datetime_series * 2 assert result.name == datetime_series.name @@ -121,19 +132,19 @@ def test_sort_index_name(self, datetime_series): def test_constructor_dict(self): d = {"a": 0.0, "b": 1.0, "c": 2.0} - result = Series(d) - expected = Series(d, index=sorted(d.keys())) - tm.assert_series_equal(result, expected) + result = self.series_klass(d) + expected = self.series_klass(d, index=sorted(d.keys())) + self._assert_series_equal(result, expected) - result = Series(d, index=["b", "c", "d", "a"]) - expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"]) - tm.assert_series_equal(result, expected) + result = self.series_klass(d, index=["b", "c", "d", "a"]) + expected = self.series_klass([1, 2, np.nan, 0], index=["b", "c", "d", "a"]) + self._assert_series_equal(result, expected) def test_constructor_subclass_dict(self): data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) - series = Series(data) - expected = Series(dict(data.items())) - tm.assert_series_equal(series, expected) + series = self.series_klass(data) + expected = self.series_klass(dict(data.items())) + self._assert_series_equal(series, expected) def test_constructor_ordereddict(self): # GH3283 @@ -141,44 +152,44 @@ def test_constructor_ordereddict(self): ("col{i}".format(i=i), np.random.random()) for i in range(12) ) - series = Series(data) - expected = Series(list(data.values()), list(data.keys())) - tm.assert_series_equal(series, expected) + series = self.series_klass(data) + expected = self.series_klass(list(data.values()), list(data.keys())) + self._assert_series_equal(series, expected) # Test with subclass class A(OrderedDict): pass - series = Series(A(data)) - tm.assert_series_equal(series, expected) + series = self.series_klass(A(data)) + self._assert_series_equal(series, expected) def test_constructor_dict_multiindex(self): d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0} _d = sorted(d.items()) - result = Series(d) - expected = Series( + result = self.series_klass(d) + expected = self.series_klass( [x[1] for x in _d], index=pd.MultiIndex.from_tuples([x[0] for x in _d]) ) - tm.assert_series_equal(result, expected) + self._assert_series_equal(result, expected) d["z"] = 111.0 _d.insert(0, ("z", d["z"])) - result = Series(d) - expected = Series( + result = self.series_klass(d) + expected = self.series_klass( [x[1] for x in _d], index=pd.Index([x[0] for x in _d], tupleize_cols=False) ) result = result.reindex(index=expected.index) - tm.assert_series_equal(result, expected) + self._assert_series_equal(result, expected) def test_constructor_dict_timedelta_index(self): # GH #12169 : Resample category data with timedelta index # construct Series from dict as data and TimedeltaIndex as index # will result NaN in result Series data - expected = Series( + expected = self.series_klass( data=["A", "B", "C"], index=pd.to_timedelta([0, 10, 20], unit="s") ) - result = Series( + result = self.series_klass( data={ pd.to_timedelta(0, unit="s"): "A", pd.to_timedelta(10, unit="s"): "B", @@ -186,13 +197,20 @@ def test_constructor_dict_timedelta_index(self): }, index=pd.to_timedelta([0, 10, 20], unit="s"), ) - tm.assert_series_equal(result, expected) + self._assert_series_equal(result, expected) def test_sparse_accessor_updates_on_inplace(self): s = pd.Series([1, 1, 2, 3], dtype="Sparse[int]") s.drop([0, 1], inplace=True) assert s.sparse.density == 1.0 + +class TestSeriesMisc(SharedWithSparse): + + series_klass = Series + # SharedWithSparse tests use generic, series_klass-agnostic assertion + _assert_series_equal = staticmethod(tm.assert_series_equal) + def test_tab_completion(self): # GH 9910 s = Series(list("abcd")) From bbfff68bd1c15c3fed0acd7a7e3bfe39ea24d7d7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Dec 2019 09:37:08 -0800 Subject: [PATCH 5/6] address leftover comments from #29986 --- pandas/tests/test_strings.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index cf52e286a47a5..89ede96dce18e 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2970,14 +2970,10 @@ def test_partition_sep_kwarg(self): # GH 22676; depr kwarg "pat" in favor of "sep" values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) - # str.partition - # using sep -> no warning expected = values.str.partition(sep="_") result = values.str.partition("_") tm.assert_frame_equal(result, expected) - # str.rpartition - # using sep -> no warning expected = values.str.rpartition(sep="_") result = values.str.rpartition("_") tm.assert_frame_equal(result, expected) From ef1124e976ad268f81182a9fd9727139eb3cf76f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 11 Dec 2019 20:21:33 -0800 Subject: [PATCH 6/6] revert change that broke mypy --- pandas/core/computation/eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 8e2f082a0daf6..2e5a563b815b3 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -167,7 +167,7 @@ def _check_for_locals(expr: str, stack_level: int, parser: str): def eval( expr, - parser: str = "pandas", + parser="pandas", engine=None, truediv=_no_default, local_dict=None,