From 2fe603d7eef4e6c8c7c5150d76e0377c26bd650d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 30 Jun 2019 11:18:08 -0500 Subject: [PATCH 01/21] DEPR: get_dtype_values --- pandas/core/computation/expressions.py | 6 ++-- pandas/core/frame.py | 2 +- pandas/core/generic.py | 3 ++ pandas/tests/frame/test_api.py | 2 +- pandas/tests/frame/test_arithmetic.py | 6 ++-- pandas/tests/frame/test_block_internals.py | 8 +++-- pandas/tests/frame/test_combine_concat.py | 2 +- pandas/tests/frame/test_constructors.py | 36 +++++++++++----------- pandas/tests/frame/test_dtypes.py | 6 ++-- pandas/tests/frame/test_indexing.py | 10 +++--- pandas/tests/frame/test_missing.py | 4 +-- pandas/tests/frame/test_mutate_columns.py | 12 ++++++-- pandas/tests/frame/test_reshape.py | 8 ++--- pandas/tests/frame/test_timezones.py | 2 +- pandas/tests/groupby/test_apply.py | 3 +- pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/io/pytables/test_pytables.py | 4 ++- pandas/tests/reshape/test_pivot.py | 4 +-- pandas/tests/reshape/test_reshape.py | 7 +++-- pandas/tests/series/test_arithmetic.py | 6 ++-- pandas/tests/series/test_dtypes.py | 2 +- pandas/tests/sparse/frame/test_frame.py | 4 +-- 22 files changed, 80 insertions(+), 59 deletions(-) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index f293b3b33e8d3..0b18bac390f0f 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -79,11 +79,11 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): # check for dtype compatibility dtypes = set() for o in [a, b]: - if hasattr(o, 'get_dtype_counts'): - s = o.get_dtype_counts() + if hasattr(o, '_data'): + s = o._data.get_dtype_counts() if len(s) > 1: return False - dtypes |= set(s.index) + dtypes |= set(s.keys) elif isinstance(o, np.ndarray): dtypes |= {o.dtype.name} diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d7da653618b2f..f96f8a92ca6cf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2325,7 +2325,7 @@ def _sizeof_fmt(num, size_qualifier): else: _verbose_repr() - counts = self.get_dtype_counts() + counts = self._data.get_dtype_counts() dtypes = ['{k}({kk:d})'.format(k=k[0], kk=k[1]) for k in sorted(counts.items())] lines.append('dtypes: {types}'.format(types=', '.join(dtypes))) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 166d8526456fb..05598e0c8b0c7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5290,6 +5290,9 @@ def get_dtype_counts(self): object 1 dtype: int64 """ + warnings.warn("`get_dtype_counts` has been deprecated and will be " + "removed in a future version.", FutureWarning, + stacklevel=2) from pandas import Series return Series(self._data.get_dtype_counts()) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index ce841b302a037..1d7f3b0a79b94 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -433,7 +433,7 @@ def test_with_datetimelikes(self): 'B': timedelta_range('1 day', periods=10)}) t = df.T - result = t.get_dtype_counts() + result = Series(t._data.get_dtype_counts()) if self.klass is DataFrame: expected = Series({'object': 10}) else: diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 061e0d32e1f06..94d6c435a1b60 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -273,7 +273,7 @@ def test_df_flex_cmp_constant_return_types(self, opname): df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]}) const = 2 - result = getattr(df, opname)(const).get_dtype_counts() + result = pd.Series(getattr(df, opname)(const)._data.get_dtype_counts()) tm.assert_series_equal(result, pd.Series([2], ['bool'])) @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) @@ -283,7 +283,9 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): const = 2 empty = df.iloc[:0] - result = getattr(empty, opname)(const).get_dtype_counts() + result = pd.Series( + getattr(empty, opname)(const)._data.get_dtype_counts() + ) tm.assert_series_equal(result, pd.Series([2], ['bool'])) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 6fbc884829784..d6dc3d6db8fb7 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -217,7 +217,7 @@ def test_construction_with_mixed(self, float_string_frame): df = DataFrame(data) # check dtypes - result = df.get_dtype_counts().sort_values() + result = df.dtypes expected = Series({'datetime64[ns]': 3}) # mixed-type frames @@ -225,7 +225,9 @@ def test_construction_with_mixed(self, float_string_frame): float_string_frame['timedelta'] = timedelta(days=1, seconds=1) assert float_string_frame['datetime'].dtype == 'M8[ns]' assert float_string_frame['timedelta'].dtype == 'm8[ns]' - result = float_string_frame.get_dtype_counts().sort_values() + result = Series( + float_string_frame._data.get_dtype_counts() + ).sort_values() expected = Series({'float64': 4, 'object': 1, 'datetime64[ns]': 1, @@ -409,7 +411,7 @@ def test_get_numeric_data(self): df = DataFrame({'a': 1., 'b': 2, 'c': 'foo', 'f': Timestamp('20010102')}, index=np.arange(10)) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series({'int64': 1, 'float64': 1, datetime64name: 1, objectname: 1}) result = result.sort_index() diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index faa86acb1584f..ad45604cd406c 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -17,7 +17,7 @@ def test_concat_multiple_frames_dtypes(self): A = DataFrame(data=np.ones((10, 2)), columns=[ 'foo', 'bar'], dtype=np.float64) B = DataFrame(data=np.ones((10, 2)), dtype=np.float32) - results = pd.concat((A, B), axis=1).get_dtype_counts() + results = Series(pd.concat((A, B), axis=1)._data.get_dtype_counts()) expected = Series(dict(float64=2, float32=2)) assert_series_equal(results, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 981dc8b32b8cc..7d6e7123fb04e 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1579,7 +1579,7 @@ def test_constructor_with_datetimes(self): 'D': Timestamp("20010101"), 'E': datetime(2001, 1, 2, 0, 0)}, index=np.arange(10)) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series({'int64': 1, datetime64name: 2, objectname: 2}) result.sort_index() expected.sort_index() @@ -1591,7 +1591,7 @@ def test_constructor_with_datetimes(self): floatname: np.array(1., dtype=floatname), intname: np.array(1, dtype=intname)}, index=np.arange(10)) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = {objectname: 1} if intname == 'int64': expected['int64'] = 2 @@ -1613,7 +1613,7 @@ def test_constructor_with_datetimes(self): floatname: np.array([1.] * 10, dtype=floatname), intname: np.array([1] * 10, dtype=intname)}, index=np.arange(10)) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) result = result.sort_index() tm.assert_series_equal(result, expected) @@ -1623,7 +1623,7 @@ def test_constructor_with_datetimes(self): datetime_s = Series(datetimes) assert datetime_s.dtype == 'M8[ns]' df = DataFrame({'datetime_s': datetime_s}) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series({datetime64name: 1}) result = result.sort_index() expected = expected.sort_index() @@ -1634,7 +1634,7 @@ def test_constructor_with_datetimes(self): datetimes = [ts.to_pydatetime() for ts in ind] dates = [ts.date() for ts in ind] df = DataFrame({'datetimes': datetimes, 'dates': dates}) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series({datetime64name: 1, objectname: 1}) result = result.sort_index() expected = expected.sort_index() @@ -1693,7 +1693,7 @@ def test_constructor_datetimes_with_nulls(self): for arr in [np.array([None, None, None, None, datetime.now(), None]), np.array([None, None, datetime.now(), None])]: - result = DataFrame(arr).get_dtype_counts() + result = Series(DataFrame(arr)._data.get_dtype_counts()) expected = Series({'datetime64[ns]': 1}) tm.assert_series_equal(result, expected) @@ -1706,49 +1706,49 @@ def test_constructor_for_list_with_dtypes(self): # test list of lists/ndarrays df = DataFrame([np.arange(5) for x in range(5)]) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series({'int64': 5}) df = DataFrame([np.array(np.arange(5), dtype='int32') for x in range(5)]) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series({'int32': 5}) # overflow issue? (we always expecte int64 upcasting here) df = DataFrame({'a': [2 ** 31, 2 ** 31 + 1]}) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series({'int64': 1}) tm.assert_series_equal(result, expected) # GH #2751 (construction with no index specified), make sure we cast to # platform values df = DataFrame([1, 2]) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series({'int64': 1}) tm.assert_series_equal(result, expected) df = DataFrame([1., 2.]) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series({'float64': 1}) tm.assert_series_equal(result, expected) df = DataFrame({'a': [1, 2]}) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series({'int64': 1}) tm.assert_series_equal(result, expected) df = DataFrame({'a': [1., 2.]}) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series({'float64': 1}) tm.assert_series_equal(result, expected) df = DataFrame({'a': 1}, index=range(3)) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series({'int64': 1}) tm.assert_series_equal(result, expected) df = DataFrame({'a': 1.}, index=range(3)) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series({'float64': 1}) tm.assert_series_equal(result, expected) @@ -1757,7 +1757,7 @@ def test_constructor_for_list_with_dtypes(self): 'c': list('abcd'), 'd': [datetime(2000, 1, 1) for i in range(4)], 'e': [1., 2, 4., 7]}) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series( {'int64': 1, 'float64': 2, datetime64name: 1, objectname: 1}) result = result.sort_index() @@ -2077,14 +2077,14 @@ def test_from_records_misc_brokenness(self): rows.append([datetime(2010, 1, 1), 1]) rows.append([datetime(2010, 1, 2), 'hi']) # test col upconverts to obj df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) - results = df2_obj.get_dtype_counts() + results = Series(df2_obj._data.get_dtype_counts()) expected = Series({'datetime64[ns]': 1, 'object': 1}) rows = [] rows.append([datetime(2010, 1, 1), 1]) rows.append([datetime(2010, 1, 2), 1]) df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) - results = df2_obj.get_dtype_counts().sort_index() + results = Series(df2_obj._data.get_dtype_counts()).sort_index() expected = Series({'datetime64[ns]': 1, 'int64': 1}) tm.assert_series_equal(results, expected) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index f68770d796292..dad2e5db14d24 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -836,7 +836,7 @@ def test_timedeltas(self): df = DataFrame(dict(A=Series(date_range('2012-1-1', periods=3, freq='D')), B=Series([timedelta(days=i) for i in range(3)]))) - result = df.get_dtype_counts().sort_index() + result = Series(df._data.get_dtype_counts()).sort_index() expected = Series( {'datetime64[ns]': 1, 'timedelta64[ns]': 1}).sort_index() assert_series_equal(result, expected) @@ -844,7 +844,7 @@ def test_timedeltas(self): df['C'] = df['A'] + df['B'] expected = Series( {'datetime64[ns]': 2, 'timedelta64[ns]': 1}).sort_values() - result = df.get_dtype_counts().sort_values() + result = Series(df._data.get_dtype_counts()).sort_values() assert_series_equal(result, expected) # mixed int types @@ -852,7 +852,7 @@ def test_timedeltas(self): expected = Series({'datetime64[ns]': 2, 'timedelta64[ns]': 1, 'int64': 1}).sort_values() - result = df.get_dtype_counts().sort_values() + result = Series(df._data.get_dtype_counts()).sort_values() assert_series_equal(result, expected) def test_arg_for_errors_in_astype(self): diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 4c1abfb1a7f6f..ef589a125dee6 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -300,14 +300,14 @@ def test_getitem_boolean_casting(self, datetime_frame): df['F1'] = df['F'].copy() casted = df[df > 0] - result = casted.get_dtype_counts() + result = Series(casted._data.get_dtype_counts()) expected = Series({'float64': 4, 'int32': 2, 'int64': 2}) assert_series_equal(result, expected) # int block splitting df.loc[df.index[1:3], ['E1', 'F1']] = 0 casted = df[df > 0] - result = casted.get_dtype_counts() + result = Series(casted._data.get_dtype_counts()) expected = Series({'float64': 6, 'int32': 1, 'int64': 1}) assert_series_equal(result, expected) @@ -615,7 +615,7 @@ def test_setitem_cast(self, float_frame): df = DataFrame(np.random.rand(30, 3), columns=tuple('ABC')) df['event'] = np.nan df.loc[10, 'event'] = 'foo' - result = df.get_dtype_counts().sort_values() + result = Series(df._data.get_dtype_counts()).sort_values() expected = Series({'float64': 3, 'object': 1}).sort_values() assert_series_equal(result, expected) @@ -1614,7 +1614,7 @@ def test_setitem_single_column_mixed_datetime(self): df['timestamp'] = Timestamp('20010102') # check our dtypes - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series({'float64': 3, 'datetime64[ns]': 1}) assert_series_equal(result, expected) @@ -2637,7 +2637,7 @@ def _check_get(df, cond, check_dtypes=True): for c in ['float32', 'float64', 'int32', 'int64']}) df.iloc[1, :] = 0 - result = df.where(df >= 0).get_dtype_counts() + result = Series(df.where(df >= 0)._data.get_dtype_counts()) # when we don't preserve boolean casts # diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index c72951ac4cdfa..dd6365091b733 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -407,13 +407,13 @@ def test_fillna_downcast(self): def test_fillna_dtype_conversion(self): # make sure that fillna on an empty frame works df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) - result = df.get_dtype_counts().sort_values() + result = Series(df._data.get_dtype_counts().sort_values()) expected = Series({'object': 5}) assert_series_equal(result, expected) result = df.fillna(1) expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) - result = result.get_dtype_counts().sort_values() + result = Series(result._data.get_dtype_counts()).sort_values() expected = Series({'int64': 5}) assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index dc2ac5f728ec7..58b291d84112d 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -159,16 +159,22 @@ def test_insert(self): # new item df['x'] = df['a'].astype('float32') result = Series(dict(float32=1, float64=5)) - assert (df.get_dtype_counts().sort_index() == result).all() + assert ( + Series(df._data.get_dtype_counts()).sort_index() == result + ).all() # replacing current (in different block) df['a'] = df['a'].astype('float32') result = Series(dict(float32=2, float64=4)) - assert (df.get_dtype_counts().sort_index() == result).all() + assert ( + Series(df._data.get_dtype_counts()).sort_index() == result + ).all() df['y'] = df['a'].astype('int32') result = Series(dict(float32=2, float64=4, int32=1)) - assert (df.get_dtype_counts().sort_index() == result).all() + assert ( + Series(df._data.get_dtype_counts()).sort_index() == result + ).all() with pytest.raises(ValueError, match='already exists'): df.insert(1, 'a', df['b']) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index a3b9e529431e5..cbe815adf5da6 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -502,14 +502,14 @@ def test_unstack_dtypes(self): [2, 2, 3, 4]] df = DataFrame(rows, columns=list('ABCD')) - result = df.get_dtype_counts() + result = Series(df._data.get_dtype_counts()) expected = Series({'int64': 4}) assert_series_equal(result, expected) # single dtype df2 = df.set_index(['A', 'B']) df3 = df2.unstack('B') - result = df3.get_dtype_counts() + result = Series(df3._data.get_dtype_counts()) expected = Series({'int64': 4}) assert_series_equal(result, expected) @@ -517,13 +517,13 @@ def test_unstack_dtypes(self): df2 = df.set_index(['A', 'B']) df2['C'] = 3. df3 = df2.unstack('B') - result = df3.get_dtype_counts() + result = Series(df3._data.get_dtype_counts()) expected = Series({'int64': 2, 'float64': 2}) assert_series_equal(result, expected) df2['D'] = 'foo' df3 = df2.unstack('B') - result = df3.get_dtype_counts() + result = Series(df3._data.get_dtype_counts()) expected = Series({'float64': 2, 'object': 2}) assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py index 5b2f846eccdd5..3eae831a008a3 100644 --- a/pandas/tests/frame/test_timezones.py +++ b/pandas/tests/frame/test_timezones.py @@ -154,7 +154,7 @@ def test_frame_no_datetime64_dtype(self, tz): 'dr_tz': dr_tz, 'datetimes_naive': datetimes_naive, 'datetimes_with_tz': datetimes_with_tz}) - result = df.get_dtype_counts().sort_index() + result = Series(df._data.get_dtype_counts()).sort_index() expected = Series({'datetime64[ns]': 2, str(tz_expected): 2}).sort_index() tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 0fb8673e6274a..6158b507a8280 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -180,7 +180,8 @@ def test_apply_with_mixed_dtype(): df = DataFrame({'foo1': np.random.randn(6), 'foo2': ['one', 'two', 'two', 'three', 'one', 'two']}) result = df.apply(lambda x: x, axis=1) - tm.assert_series_equal(df.get_dtype_counts(), result.get_dtype_counts()) + tm.assert_series_equal(Series(df._data.get_dtype_counts()), + result.get_dtype_counts()) # GH 3610 incorrect dtype conversion with as_index=False df = DataFrame({"c1": [1, 2, 6, 6, 8]}) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index dcd0d3938c6a5..458aab7d11b24 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -88,7 +88,7 @@ def max_value(group): return group.loc[group['value'].idxmax()] applied = df.groupby('A').apply(max_value) - result = applied.get_dtype_counts().sort_values() + result = Series(applied._data.get_dtype_counts()).sort_values() expected = Series({'float64': 2, 'int64': 1, 'object': 2}).sort_values() diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index 40cc05c317471..8a7514ee31017 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -1985,7 +1985,9 @@ def test_table_values_dtypes_roundtrip(self): df1['time2'] = Timestamp('20130102') store.append('df_mixed_dtypes1', df1) - result = store.select('df_mixed_dtypes1').get_dtype_counts() + result = Series( + store.select('df_mixed_dtypes1')._data.get_dtype_counts() + ) expected = Series({'float32': 2, 'float64': 1, 'int32': 1, 'bool': 1, 'int16': 1, 'int8': 1, 'int64': 1, 'object': 1, 'datetime64[ns]': 2}) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 8543d2c2df7d6..29e684b00f8b8 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -245,7 +245,7 @@ def test_pivot_dtypes(self): z = pivot_table(f, values='v', index=['a'], columns=[ 'i'], fill_value=0, aggfunc=np.sum) - result = z.get_dtype_counts() + result = Series(z._data.get_dtype_counts()) expected = Series(dict(int64=2)) tm.assert_series_equal(result, expected) @@ -256,7 +256,7 @@ def test_pivot_dtypes(self): z = pivot_table(f, values='v', index=['a'], columns=[ 'i'], fill_value=0, aggfunc=np.mean) - result = z.get_dtype_counts() + result = Series(z._data.get_dtype_counts()) expected = Series(dict(float64=2)) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 283814d2375b1..e96d99f858784 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -101,7 +101,8 @@ def test_basic_types(self, sparse, dtype): dtype_name = self.effective_dtype(dtype).name expected = Series({dtype_name: 8}) - tm.assert_series_equal(result.get_dtype_counts(), expected) + tm.assert_series_equal(Series(result._data.get_dtype_counts()), + expected) result = get_dummies(s_df, columns=['a'], sparse=sparse, dtype=dtype) @@ -109,7 +110,9 @@ def test_basic_types(self, sparse, dtype): expected_counts[dtype_name] = 3 + expected_counts.get(dtype_name, 0) expected = Series(expected_counts).sort_index() - tm.assert_series_equal(result.get_dtype_counts().sort_index(), + tm.assert_series_equal(Series( + result._data.get_dtype_counts() + ).sort_index(), expected) def test_just_na(self, sparse): diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 43fcddea3d964..038a69864a0b5 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -123,7 +123,9 @@ def test_ser_flex_cmp_return_dtypes(self, opname): ser = Series([1, 3, 2], index=range(3)) const = 2 - result = getattr(ser, opname)(const).get_dtype_counts() + result = Series( + getattr(ser, opname)(const)._data.get_dtype_counts() + ) tm.assert_series_equal(result, Series([1], ['bool'])) @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) @@ -133,7 +135,7 @@ def test_ser_flex_cmp_return_dtypes_empty(self, opname): empty = ser.iloc[:0] const = 2 - result = getattr(empty, opname)(const).get_dtype_counts() + result = Series(getattr(empty, opname)(const)._data.get_dtype_counts()) tm.assert_series_equal(result, Series([1], ['bool'])) @pytest.mark.parametrize('op', [operator.eq, operator.ne, diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index b9146534d10f1..3db75718c7b17 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -56,7 +56,7 @@ def test_dtype(self, datetime_series): # GH 26705 - Assert .ftypes is deprecated with tm.assert_produces_warning(FutureWarning): assert datetime_series.ftypes == 'float64:dense' - tm.assert_series_equal(datetime_series.get_dtype_counts(), + tm.assert_series_equal(Series(datetime_series._data.get_dtype_counts()), Series(1, ['float64'])) # GH18243 - Assert .get_ftype_counts is deprecated with tm.assert_produces_warning(FutureWarning): diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 2d0b338ef53c0..81179959d3f13 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -293,7 +293,7 @@ def test_dtypes(self): df.loc[:9998] = np.nan sdf = df.to_sparse() - result = sdf.get_dtype_counts() + result = Series(sdf._data.get_dtype_counts()) expected = Series({'Sparse[float64, nan]': 4}) tm.assert_series_equal(result, expected) @@ -902,7 +902,7 @@ def test_corr(self, float_frame): def test_describe(self, float_frame): float_frame['foo'] = np.nan - float_frame.get_dtype_counts() + Series(float_frame._data.get_dtype_counts()) str(float_frame) desc = float_frame.describe() # noqa From 735a2fda7ed8928aa27c0c88c01fb8f410e66b26 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 30 Jun 2019 11:50:17 -0500 Subject: [PATCH 02/21] Add test and whatsnew note: --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/tests/frame/test_missing.py | 2 +- pandas/tests/generic/test_generic.py | 6 ++++++ pandas/tests/groupby/test_apply.py | 3 ++- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 82e093bc2bd49..55204c5552901 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -616,6 +616,7 @@ Other deprecations - :attr:`Series.imag` and :attr:`Series.real` are deprecated. (:issue:`18262`) - :meth:`Series.put` is deprecated. (:issue:`18262`) - :meth:`Index.item` and :meth:`Series.item` is deprecated. (:issue:`18262`) +- :meth:`DataFrame.get_dtype_counts` is deprecated. (:issue:`18262`) .. _whatsnew_0250.prior_deprecations: diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index dd6365091b733..a6c5ea8f57d70 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -407,7 +407,7 @@ def test_fillna_downcast(self): def test_fillna_dtype_conversion(self): # make sure that fillna on an empty frame works df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) - result = Series(df._data.get_dtype_counts().sort_values()) + result = Series(df._data.get_dtype_counts()).sort_values() expected = Series({'object': 5}) assert_series_equal(result, expected) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index b1a083213debd..e8343a1cf318b 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -932,3 +932,9 @@ def test_deprecated_to_dense(self): with tm.assert_produces_warning(FutureWarning): result = ser.to_dense() tm.assert_series_equal(result, ser) + + def test_deprecated_get_dtype_counts(self): + # GH 18262 + df = DataFrame([1]) + with tm.assert_produces_warning(FutureWarning): + df.get_dtype_counts() diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 6158b507a8280..d71c2d5a346d6 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -181,7 +181,8 @@ def test_apply_with_mixed_dtype(): 'foo2': ['one', 'two', 'two', 'three', 'one', 'two']}) result = df.apply(lambda x: x, axis=1) tm.assert_series_equal(Series(df._data.get_dtype_counts()), - result.get_dtype_counts()) + Series(result._data.get_dtype_counts()) + ) # GH 3610 incorrect dtype conversion with as_index=False df = DataFrame({"c1": [1, 2, 6, 6, 8]}) From 84738855821caa529240254fd92814ed84db41c7 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 30 Jun 2019 11:54:39 -0500 Subject: [PATCH 03/21] Flake8 --- pandas/tests/frame/test_mutate_columns.py | 18 +++++++++--------- pandas/tests/reshape/test_reshape.py | 10 ++++++---- pandas/tests/series/test_dtypes.py | 8 ++++++-- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 58b291d84112d..ce6669c5a1725 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -159,22 +159,22 @@ def test_insert(self): # new item df['x'] = df['a'].astype('float32') result = Series(dict(float32=1, float64=5)) - assert ( - Series(df._data.get_dtype_counts()).sort_index() == result - ).all() + assert (Series( + df._data.get_dtype_counts() + ).sort_index() == result).all() # replacing current (in different block) df['a'] = df['a'].astype('float32') result = Series(dict(float32=2, float64=4)) - assert ( - Series(df._data.get_dtype_counts()).sort_index() == result - ).all() + assert (Series( + df._data.get_dtype_counts() + ).sort_index() == result).all() df['y'] = df['a'].astype('int32') result = Series(dict(float32=2, float64=4, int32=1)) - assert ( - Series(df._data.get_dtype_counts()).sort_index() == result - ).all() + assert (Series( + df._data.get_dtype_counts() + ).sort_index() == result).all() with pytest.raises(ValueError, match='already exists'): df.insert(1, 'a', df['b']) diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index e96d99f858784..94c69f0e947a7 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -110,10 +110,12 @@ def test_basic_types(self, sparse, dtype): expected_counts[dtype_name] = 3 + expected_counts.get(dtype_name, 0) expected = Series(expected_counts).sort_index() - tm.assert_series_equal(Series( - result._data.get_dtype_counts() - ).sort_index(), - expected) + tm.assert_series_equal( + Series( + result._data.get_dtype_counts() + ).sort_index(), + expected + ) def test_just_na(self, sparse): just_na_list = [np.nan] diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 3db75718c7b17..5d24d799972c8 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -56,8 +56,12 @@ def test_dtype(self, datetime_series): # GH 26705 - Assert .ftypes is deprecated with tm.assert_produces_warning(FutureWarning): assert datetime_series.ftypes == 'float64:dense' - tm.assert_series_equal(Series(datetime_series._data.get_dtype_counts()), - Series(1, ['float64'])) + tm.assert_series_equal( + Series( + datetime_series._data.get_dtype_counts() + ), + Series(1, ['float64']) + ) # GH18243 - Assert .get_ftype_counts is deprecated with tm.assert_produces_warning(FutureWarning): tm.assert_series_equal(datetime_series.get_ftype_counts(), From faeb97292bbb9edb2e5c791ceaaf414adb637f15 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 30 Jun 2019 15:30:23 -0500 Subject: [PATCH 04/21] Start converting tests to compare np dtypes --- pandas/tests/frame/test_api.py | 5 +++-- pandas/tests/frame/test_arithmetic.py | 10 ++++----- pandas/tests/frame/test_block_internals.py | 25 +++++++++++----------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 1d7f3b0a79b94..1853e6b5d4a08 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -433,9 +433,10 @@ def test_with_datetimelikes(self): 'B': timedelta_range('1 day', periods=10)}) t = df.T - result = Series(t._data.get_dtype_counts()) + #result = Series(t._data.get_dtype_counts()) + result = t.dtypes.value_counts() if self.klass is DataFrame: - expected = Series({'object': 10}) + expected = Series({np.dtype('object'): 10}) else: expected = Series({'Sparse[object, nan]': 10}) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 94d6c435a1b60..bcbea9d7a2236 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -273,8 +273,8 @@ def test_df_flex_cmp_constant_return_types(self, opname): df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]}) const = 2 - result = pd.Series(getattr(df, opname)(const)._data.get_dtype_counts()) - tm.assert_series_equal(result, pd.Series([2], ['bool'])) + result = getattr(df, opname)(const).dtypes.value_counts() + tm.assert_series_equal(result, pd.Series([2], index=[np.dtype(bool)])) @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) def test_df_flex_cmp_constant_return_types_empty(self, opname): @@ -283,10 +283,8 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): const = 2 empty = df.iloc[:0] - result = pd.Series( - getattr(empty, opname)(const)._data.get_dtype_counts() - ) - tm.assert_series_equal(result, pd.Series([2], ['bool'])) + result = getattr(empty, opname)(const).dtypes.value_counts() + tm.assert_series_equal(result, pd.Series([2], index=[np.dtype(bool)])) # ------------------------------------------------------------------- diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index d6dc3d6db8fb7..f1cbd7763474e 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -225,13 +225,13 @@ def test_construction_with_mixed(self, float_string_frame): float_string_frame['timedelta'] = timedelta(days=1, seconds=1) assert float_string_frame['datetime'].dtype == 'M8[ns]' assert float_string_frame['timedelta'].dtype == 'm8[ns]' - result = Series( - float_string_frame._data.get_dtype_counts() - ).sort_values() - expected = Series({'float64': 4, - 'object': 1, - 'datetime64[ns]': 1, - 'timedelta64[ns]': 1}).sort_values() + result = float_string_frame.dtypes + expected = Series([np.dtype('float64')] * 4 + + [np.dtype('object'), + np.dtype('datetime64[ns]'), + np.dtype('timedelta64[ns]')], + index=list('ABCD') + ['foo', 'datetime', + 'timedelta']) assert_series_equal(result, expected) def test_construction_with_conversions(self): @@ -411,11 +411,12 @@ def test_get_numeric_data(self): df = DataFrame({'a': 1., 'b': 2, 'c': 'foo', 'f': Timestamp('20010102')}, index=np.arange(10)) - result = Series(df._data.get_dtype_counts()) - expected = Series({'int64': 1, 'float64': 1, - datetime64name: 1, objectname: 1}) - result = result.sort_index() - expected = expected.sort_index() + result = df.dtypes + expected = Series([np.dtype('float64'), + np.dtype('int64'), + np.dtype(objectname), + np.dtype(datetime64name)], + index=['a', 'b', 'c', 'f']) assert_series_equal(result, expected) df = DataFrame({'a': 1., 'b': 2, 'c': 'foo', From 95a7075f300be49592c00803c728a67a4b916e7d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 30 Jun 2019 17:26:36 -0500 Subject: [PATCH 05/21] keys --> keys(), first pass at converting tests --- pandas/core/computation/expressions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 0b18bac390f0f..e40669ae69f6f 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -83,7 +83,7 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): s = o._data.get_dtype_counts() if len(s) > 1: return False - dtypes |= set(s.keys) + dtypes |= set(s.keys()) elif isinstance(o, np.ndarray): dtypes |= {o.dtype.name} From eb5213cb2e5845d30e171b70dc07e9fef7fbca81 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 30 Jun 2019 20:29:14 -0500 Subject: [PATCH 06/21] Change more tests --- pandas/tests/frame/test_api.py | 1 - pandas/tests/frame/test_reshape.py | 30 +++++++++++++++++++--------- pandas/tests/frame/test_timezones.py | 11 +++++++--- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 1853e6b5d4a08..88cfd1a02c0e2 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -433,7 +433,6 @@ def test_with_datetimelikes(self): 'B': timedelta_range('1 day', periods=10)}) t = df.T - #result = Series(t._data.get_dtype_counts()) result = t.dtypes.value_counts() if self.klass is DataFrame: expected = Series({np.dtype('object'): 10}) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index cbe815adf5da6..ec9404677c318 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -502,29 +502,41 @@ def test_unstack_dtypes(self): [2, 2, 3, 4]] df = DataFrame(rows, columns=list('ABCD')) - result = Series(df._data.get_dtype_counts()) - expected = Series({'int64': 4}) + result = df.dtypes + expected = Series([np.dtype('int64')] * 4, + index=list('ABCD')) assert_series_equal(result, expected) # single dtype df2 = df.set_index(['A', 'B']) df3 = df2.unstack('B') - result = Series(df3._data.get_dtype_counts()) - expected = Series({'int64': 4}) + result = df3.dtypes + expected = Series([np.dtype('int64')] * 4, + index=pd.MultiIndex.from_arrays([ + ['C', 'C', 'D', 'D'], + [1, 2, 1, 2] + ], names=(None, 'B'))) assert_series_equal(result, expected) # mixed df2 = df.set_index(['A', 'B']) df2['C'] = 3. df3 = df2.unstack('B') - result = Series(df3._data.get_dtype_counts()) - expected = Series({'int64': 2, 'float64': 2}) + result = df3.dtypes + expected = Series([np.dtype('float64')] * 2 + [np.dtype('int64')] * 2, + index=pd.MultiIndex.from_arrays([ + ['C', 'C', 'D', 'D'], + [1, 2, 1, 2] + ], names=(None, 'B'))) assert_series_equal(result, expected) - df2['D'] = 'foo' df3 = df2.unstack('B') - result = Series(df3._data.get_dtype_counts()) - expected = Series({'float64': 2, 'object': 2}) + result = df3.dtypes + expected = Series([np.dtype('float64')] * 2 + [np.dtype('object')] * 2, + index=pd.MultiIndex.from_arrays([ + ['C', 'C', 'D', 'D'], + [1, 2, 1, 2] + ], names=(None, 'B'))) assert_series_equal(result, expected) # GH7405 diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py index 3eae831a008a3..1703fceeba9cf 100644 --- a/pandas/tests/frame/test_timezones.py +++ b/pandas/tests/frame/test_timezones.py @@ -154,9 +154,14 @@ def test_frame_no_datetime64_dtype(self, tz): 'dr_tz': dr_tz, 'datetimes_naive': datetimes_naive, 'datetimes_with_tz': datetimes_with_tz}) - result = Series(df._data.get_dtype_counts()).sort_index() - expected = Series({'datetime64[ns]': 2, - str(tz_expected): 2}).sort_index() + result = df.dtypes + expected = Series([ + np.dtype('datetime64[ns]'), + DatetimeTZDtype(tz=tz), + np.dtype('datetime64[ns]'), + DatetimeTZDtype(tz=tz) + ], + index=['dr', 'dr_tz', 'datetimes_naive', 'datetimes_with_tz']) tm.assert_series_equal(result, expected) @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern']) From 06af7fc539de2781bd1c3ef4dc07571ee6c4a938 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 30 Jun 2019 20:37:58 -0500 Subject: [PATCH 07/21] Change more tests --- pandas/tests/frame/test_combine_concat.py | 5 +++-- pandas/tests/frame/test_mutate_columns.py | 27 +++++++++++++---------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index ad45604cd406c..290d5459dc26f 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -17,8 +17,9 @@ def test_concat_multiple_frames_dtypes(self): A = DataFrame(data=np.ones((10, 2)), columns=[ 'foo', 'bar'], dtype=np.float64) B = DataFrame(data=np.ones((10, 2)), dtype=np.float32) - results = Series(pd.concat((A, B), axis=1)._data.get_dtype_counts()) - expected = Series(dict(float64=2, float32=2)) + results = pd.concat((A, B), axis=1).dtypes + expected = Series([np.dtype('float64')] * 2 + [np.dtype('float32')] * 2, + index=['foo', 'bar', 0, 1]) assert_series_equal(results, expected) @pytest.mark.parametrize('data', [ diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index ce6669c5a1725..ffc2a515bc4b7 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -158,23 +158,26 @@ def test_insert(self): # new item df['x'] = df['a'].astype('float32') - result = Series(dict(float32=1, float64=5)) - assert (Series( - df._data.get_dtype_counts() - ).sort_index() == result).all() + result = df.dtypes + expected = Series([np.dtype('float64')] * 5 + [np.dtype('float32')], + index=['foo', 'c', 'bar', 'b', 'a', 'x']) + tm.assert_series_equal(result, expected) # replacing current (in different block) df['a'] = df['a'].astype('float32') - result = Series(dict(float32=2, float64=4)) - assert (Series( - df._data.get_dtype_counts() - ).sort_index() == result).all() + result = df.dtypes + expected = Series([np.dtype('float64')] * 4 + + [np.dtype('float32')] * 2, + index=['foo', 'c', 'bar', 'b', 'a', 'x']) + tm.assert_series_equal(result, expected) df['y'] = df['a'].astype('int32') - result = Series(dict(float32=2, float64=4, int32=1)) - assert (Series( - df._data.get_dtype_counts() - ).sort_index() == result).all() + result = df.dtypes + expected = Series([np.dtype('float64')] * 4 + + [np.dtype('float32')] * 2 + + [np.dtype('int32')], + index=['foo', 'c', 'bar', 'b', 'a', 'x', 'y']) + tm.assert_series_equal(result, expected) with pytest.raises(ValueError, match='already exists'): df.insert(1, 'a', df['b']) From 879485b342095f9cba4f0bfb415aa1f2eb0b1103 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 30 Jun 2019 20:46:32 -0500 Subject: [PATCH 08/21] More progress --- pandas/tests/frame/test_api.py | 6 +++--- pandas/tests/series/test_dtypes.py | 6 ------ pandas/tests/sparse/frame/test_frame.py | 5 ++--- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 88cfd1a02c0e2..a6bbdd2e8ec14 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -7,8 +7,8 @@ import pandas as pd from pandas import ( - Categorical, DataFrame, Series, SparseDataFrame, compat, date_range, - timedelta_range) + Categorical, DataFrame, Series, SparseDataFrame, SparseDtype, + compat, date_range, timedelta_range) import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal) @@ -437,7 +437,7 @@ def test_with_datetimelikes(self): if self.klass is DataFrame: expected = Series({np.dtype('object'): 10}) else: - expected = Series({'Sparse[object, nan]': 10}) + expected = Series({SparseDtype(dtype=object): 10}) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 5d24d799972c8..c92f74d093f98 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -56,12 +56,6 @@ def test_dtype(self, datetime_series): # GH 26705 - Assert .ftypes is deprecated with tm.assert_produces_warning(FutureWarning): assert datetime_series.ftypes == 'float64:dense' - tm.assert_series_equal( - Series( - datetime_series._data.get_dtype_counts() - ), - Series(1, ['float64']) - ) # GH18243 - Assert .get_ftype_counts is deprecated with tm.assert_produces_warning(FutureWarning): tm.assert_series_equal(datetime_series.get_ftype_counts(), diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 81179959d3f13..710f5eeb8e1bd 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -292,9 +292,8 @@ def test_dtypes(self): df = DataFrame(np.random.randn(10000, 4)) df.loc[:9998] = np.nan sdf = df.to_sparse() - - result = Series(sdf._data.get_dtype_counts()) - expected = Series({'Sparse[float64, nan]': 4}) + result = sdf.dtypes + expected = Series(['Sparse[float64, nan]'] * 4) tm.assert_series_equal(result, expected) def test_shape(self, float_frame, float_frame_int_kind, From 0c82f16818eaa74ff8663603ceb59d3495e66317 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 30 Jun 2019 20:53:09 -0500 Subject: [PATCH 09/21] convert more tests --- pandas/tests/groupby/test_apply.py | 7 +++---- pandas/tests/series/test_arithmetic.py | 14 ++++++-------- pandas/tests/sparse/frame/test_frame.py | 2 +- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index d71c2d5a346d6..8f57254eae219 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -179,10 +179,9 @@ def test_apply_with_mixed_dtype(): # GH3480, apply with mixed dtype on axis=1 breaks in 0.11 df = DataFrame({'foo1': np.random.randn(6), 'foo2': ['one', 'two', 'two', 'three', 'one', 'two']}) - result = df.apply(lambda x: x, axis=1) - tm.assert_series_equal(Series(df._data.get_dtype_counts()), - Series(result._data.get_dtype_counts()) - ) + result = df.apply(lambda x: x, axis=1).dtypes + expected = df.dtypes + tm.assert_series_equal(result, expected) # GH 3610 incorrect dtype conversion with as_index=False df = DataFrame({"c1": [1, 2, 6, 6, 8]}) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 038a69864a0b5..2cc2ad080eb4c 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -122,11 +122,9 @@ def test_ser_flex_cmp_return_dtypes(self, opname): # GH#15115 ser = Series([1, 3, 2], index=range(3)) const = 2 - - result = Series( - getattr(ser, opname)(const)._data.get_dtype_counts() - ) - tm.assert_series_equal(result, Series([1], ['bool'])) + result = getattr(ser, opname)(const).dtypes + expected = np.dtype('bool') + assert result == expected @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) def test_ser_flex_cmp_return_dtypes_empty(self, opname): @@ -134,9 +132,9 @@ def test_ser_flex_cmp_return_dtypes_empty(self, opname): ser = Series([1, 3, 2], index=range(3)) empty = ser.iloc[:0] const = 2 - - result = Series(getattr(empty, opname)(const)._data.get_dtype_counts()) - tm.assert_series_equal(result, Series([1], ['bool'])) + result = getattr(empty, opname)(const).dtypes + expected = np.dtype('bool') + assert result == expected @pytest.mark.parametrize('op', [operator.eq, operator.ne, operator.le, operator.lt, diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 710f5eeb8e1bd..d3e2e1357f9d7 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -901,7 +901,7 @@ def test_corr(self, float_frame): def test_describe(self, float_frame): float_frame['foo'] = np.nan - Series(float_frame._data.get_dtype_counts()) + float_frame.dtypes.value_counts() str(float_frame) desc = float_frame.describe() # noqa From 525fe513818d75995e685f90479bc8fdcd032e41 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 30 Jun 2019 21:20:21 -0500 Subject: [PATCH 10/21] Converting --- pandas/tests/frame/test_constructors.py | 120 ++++++++++-------------- pandas/tests/groupby/test_groupby.py | 9 +- 2 files changed, 57 insertions(+), 72 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 7d6e7123fb04e..50a28a701af2b 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1579,10 +1579,11 @@ def test_constructor_with_datetimes(self): 'D': Timestamp("20010101"), 'E': datetime(2001, 1, 2, 0, 0)}, index=np.arange(10)) - result = Series(df._data.get_dtype_counts()) - expected = Series({'int64': 1, datetime64name: 2, objectname: 2}) - result.sort_index() - expected.sort_index() + result = df.dtypes + expected = Series([np.dtype('int64')] + + [np.dtype(objectname)] * 2 + + [np.dtype(datetime64name)] * 2, + index=list("ABCDE")) tm.assert_series_equal(result, expected) # check with ndarray construction ndim==0 (e.g. we are passing a ndim 0 @@ -1591,21 +1592,13 @@ def test_constructor_with_datetimes(self): floatname: np.array(1., dtype=floatname), intname: np.array(1, dtype=intname)}, index=np.arange(10)) - result = Series(df._data.get_dtype_counts()) - expected = {objectname: 1} - if intname == 'int64': - expected['int64'] = 2 - else: - expected['int64'] = 1 - expected[intname] = 1 - if floatname == 'float64': - expected['float64'] = 2 - else: - expected['float64'] = 1 - expected[floatname] = 1 - - result = result.sort_index() - expected = Series(expected).sort_index() + result = df.dtypes + expected = Series([np.dtype('float64')] + + [np.dtype('int64')] + + [np.dtype('object')] + + [np.dtype('float64')] + + [np.dtype('int64')], + index=['a', 'b', 'c', floatname, intname]) tm.assert_series_equal(result, expected) # check with ndarray construction ndim>0 @@ -1613,8 +1606,13 @@ def test_constructor_with_datetimes(self): floatname: np.array([1.] * 10, dtype=floatname), intname: np.array([1] * 10, dtype=intname)}, index=np.arange(10)) - result = Series(df._data.get_dtype_counts()) - result = result.sort_index() + result = df.dtypes + expected = Series([np.dtype('float64')] + + [np.dtype('int64')] + + [np.dtype('object')] + + [np.dtype('float64')] + + [np.dtype('int64')], + index=['a', 'b', 'c', floatname, intname]) tm.assert_series_equal(result, expected) # GH 2809 @@ -1622,22 +1620,15 @@ def test_constructor_with_datetimes(self): datetimes = [ts.to_pydatetime() for ts in ind] datetime_s = Series(datetimes) assert datetime_s.dtype == 'M8[ns]' - df = DataFrame({'datetime_s': datetime_s}) - result = Series(df._data.get_dtype_counts()) - expected = Series({datetime64name: 1}) - result = result.sort_index() - expected = expected.sort_index() - tm.assert_series_equal(result, expected) # GH 2810 ind = date_range(start="2000-01-01", freq="D", periods=10) datetimes = [ts.to_pydatetime() for ts in ind] dates = [ts.date() for ts in ind] df = DataFrame({'datetimes': datetimes, 'dates': dates}) - result = Series(df._data.get_dtype_counts()) - expected = Series({datetime64name: 1, objectname: 1}) - result = result.sort_index() - expected = expected.sort_index() + result = df.dtypes + expected = Series([np.dtype('datetime64[ns]'), np.dtype('object')], + index=['datetimes', 'dates']) tm.assert_series_equal(result, expected) # GH 7594 @@ -1693,8 +1684,8 @@ def test_constructor_datetimes_with_nulls(self): for arr in [np.array([None, None, None, None, datetime.now(), None]), np.array([None, None, datetime.now(), None])]: - result = Series(DataFrame(arr)._data.get_dtype_counts()) - expected = Series({'datetime64[ns]': 1}) + result = DataFrame(arr).dtypes + expected = Series([np.dtype('datetime64[ns]')]) tm.assert_series_equal(result, expected) def test_constructor_for_list_with_dtypes(self): @@ -1706,62 +1697,52 @@ def test_constructor_for_list_with_dtypes(self): # test list of lists/ndarrays df = DataFrame([np.arange(5) for x in range(5)]) - result = Series(df._data.get_dtype_counts()) - expected = Series({'int64': 5}) + result = df.dtypes + expected = Series([np.dtype('int64')] * 5) + tm.assert_series_equal(result, expected) df = DataFrame([np.array(np.arange(5), dtype='int32') for x in range(5)]) - result = Series(df._data.get_dtype_counts()) - expected = Series({'int32': 5}) + result = df.dtypes + expected = Series([np.dtype('int64')] * 5) + tm.assert_series_equal(result, expected) # overflow issue? (we always expecte int64 upcasting here) df = DataFrame({'a': [2 ** 31, 2 ** 31 + 1]}) - result = Series(df._data.get_dtype_counts()) - expected = Series({'int64': 1}) - tm.assert_series_equal(result, expected) + assert df.dtypes.iloc[0] == np.dtype('int64') # GH #2751 (construction with no index specified), make sure we cast to # platform values df = DataFrame([1, 2]) - result = Series(df._data.get_dtype_counts()) - expected = Series({'int64': 1}) - tm.assert_series_equal(result, expected) + assert df.dtypes.iloc[0] == np.dtype('int64') df = DataFrame([1., 2.]) - result = Series(df._data.get_dtype_counts()) - expected = Series({'float64': 1}) - tm.assert_series_equal(result, expected) + assert df.dtypes.iloc[0] == np.dtype('float64') df = DataFrame({'a': [1, 2]}) - result = Series(df._data.get_dtype_counts()) - expected = Series({'int64': 1}) - tm.assert_series_equal(result, expected) + assert df.dtypes.iloc[0] == np.dtype('int64') df = DataFrame({'a': [1., 2.]}) - result = Series(df._data.get_dtype_counts()) - expected = Series({'float64': 1}) - tm.assert_series_equal(result, expected) + assert df.dtypes.iloc[0] == np.dtype('float64') df = DataFrame({'a': 1}, index=range(3)) - result = Series(df._data.get_dtype_counts()) - expected = Series({'int64': 1}) - tm.assert_series_equal(result, expected) + assert df.dtypes.iloc[0] == np.dtype('int64') df = DataFrame({'a': 1.}, index=range(3)) - result = Series(df._data.get_dtype_counts()) - expected = Series({'float64': 1}) - tm.assert_series_equal(result, expected) + assert df.dtypes.iloc[0] == np.dtype('float64') # with object list df = DataFrame({'a': [1, 2, 4, 7], 'b': [1.2, 2.3, 5.1, 6.3], 'c': list('abcd'), 'd': [datetime(2000, 1, 1) for i in range(4)], 'e': [1., 2, 4., 7]}) - result = Series(df._data.get_dtype_counts()) - expected = Series( - {'int64': 1, 'float64': 2, datetime64name: 1, objectname: 1}) - result = result.sort_index() - expected = expected.sort_index() + result = df.dtypes + expected = Series([np.dtype('int64'), + np.dtype('float64'), + np.dtype('object'), + np.dtype('datetime64[ns]'), + np.dtype('float64')], + index=list('abcde')) tm.assert_series_equal(result, expected) def test_constructor_frame_copy(self, float_frame): @@ -2077,16 +2058,19 @@ def test_from_records_misc_brokenness(self): rows.append([datetime(2010, 1, 1), 1]) rows.append([datetime(2010, 1, 2), 'hi']) # test col upconverts to obj df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) - results = Series(df2_obj._data.get_dtype_counts()) - expected = Series({'datetime64[ns]': 1, 'object': 1}) + result = df2_obj.dtypes + expected = Series([np.dtype('datetime64[ns]'), np.dtype('object')], + index=['date', 'test']) + tm.assert_series_equal(result, expected) rows = [] rows.append([datetime(2010, 1, 1), 1]) rows.append([datetime(2010, 1, 2), 1]) df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) - results = Series(df2_obj._data.get_dtype_counts()).sort_index() - expected = Series({'datetime64[ns]': 1, 'int64': 1}) - tm.assert_series_equal(results, expected) + result = df2_obj.dtypes + expected = Series([np.dtype('datetime64[ns]'), np.dtype('int64')], + index=['date', 'test']) + tm.assert_series_equal(result, expected) def test_from_records_empty(self): # 3562 diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 458aab7d11b24..d13dddac79042 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -88,10 +88,11 @@ def max_value(group): return group.loc[group['value'].idxmax()] applied = df.groupby('A').apply(max_value) - result = Series(applied._data.get_dtype_counts()).sort_values() - expected = Series({'float64': 2, - 'int64': 1, - 'object': 2}).sort_values() + result = applied.dtypes + expected = Series([np.dtype('object')] * 2 + + [np.dtype('float64')] * 2 + + [np.dtype('int64')], + index=['A', 'B', 'C', 'D', 'value']) assert_series_equal(result, expected) From 592659f0ce190979e3d16f6814c3b66ffe50190d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 30 Jun 2019 21:44:51 -0500 Subject: [PATCH 11/21] Convert more tests --- pandas/tests/frame/test_missing.py | 8 +++----- pandas/tests/io/pytables/test_pytables.py | 5 ++--- pandas/tests/reshape/test_pivot.py | 10 ++++++---- pandas/tests/reshape/test_reshape.py | 15 +++++++-------- 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index a6c5ea8f57d70..0649d00043994 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -407,15 +407,13 @@ def test_fillna_downcast(self): def test_fillna_dtype_conversion(self): # make sure that fillna on an empty frame works df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) - result = Series(df._data.get_dtype_counts()).sort_values() - expected = Series({'object': 5}) + result = df.dtypes + expected = Series([np.dtype('object')] * 5, index=[1, 2, 3, 4, 5]) assert_series_equal(result, expected) result = df.fillna(1) expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) - result = Series(result._data.get_dtype_counts()).sort_values() - expected = Series({'int64': 5}) - assert_series_equal(result, expected) + assert_frame_equal(result, expected) # empty block df = DataFrame(index=range(3), columns=['A', 'B'], dtype='float64') diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index 8a7514ee31017..bf3771f988844 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -1985,9 +1985,8 @@ def test_table_values_dtypes_roundtrip(self): df1['time2'] = Timestamp('20130102') store.append('df_mixed_dtypes1', df1) - result = Series( - store.select('df_mixed_dtypes1')._data.get_dtype_counts() - ) + result = store.select('df_mixed_dtypes1').dtypes.value_counts() + result.index = [str(i) for i in result.index] expected = Series({'float32': 2, 'float64': 1, 'int32': 1, 'bool': 1, 'int16': 1, 'int8': 1, 'int64': 1, 'object': 1, 'datetime64[ns]': 2}) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 29e684b00f8b8..2ad3fa5f035c2 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -245,8 +245,9 @@ def test_pivot_dtypes(self): z = pivot_table(f, values='v', index=['a'], columns=[ 'i'], fill_value=0, aggfunc=np.sum) - result = Series(z._data.get_dtype_counts()) - expected = Series(dict(int64=2)) + result = z.dtypes + expected = Series([np.dtype('int64')] * 2, + index=Index(list('ab'), name='i')) tm.assert_series_equal(result, expected) # cannot convert dtypes @@ -256,8 +257,9 @@ def test_pivot_dtypes(self): z = pivot_table(f, values='v', index=['a'], columns=[ 'i'], fill_value=0, aggfunc=np.mean) - result = Series(z._data.get_dtype_counts()) - expected = Series(dict(float64=2)) + result = z.dtypes + expected = Series([np.dtype('float64')] * 2, + index=Index(list('ab'), name='i')) tm.assert_series_equal(result, expected) @pytest.mark.parametrize('columns,values', diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 94c69f0e947a7..d0979fb86d36d 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -101,8 +101,9 @@ def test_basic_types(self, sparse, dtype): dtype_name = self.effective_dtype(dtype).name expected = Series({dtype_name: 8}) - tm.assert_series_equal(Series(result._data.get_dtype_counts()), - expected) + result = result.dtypes.value_counts() + result.index = [str(i) for i in result.index] + tm.assert_series_equal(result, expected) result = get_dummies(s_df, columns=['a'], sparse=sparse, dtype=dtype) @@ -110,12 +111,10 @@ def test_basic_types(self, sparse, dtype): expected_counts[dtype_name] = 3 + expected_counts.get(dtype_name, 0) expected = Series(expected_counts).sort_index() - tm.assert_series_equal( - Series( - result._data.get_dtype_counts() - ).sort_index(), - expected - ) + result = result.dtypes.value_counts() + result.index = [str(i) for i in result.index] + result = result.sort_index() + tm.assert_series_equal(result, expected) def test_just_na(self, sparse): just_na_list = [np.nan] From 0df4dd946402d21308a4b9f248a0a235c60676cc Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 30 Jun 2019 22:00:36 -0500 Subject: [PATCH 12/21] convert tests --- pandas/tests/frame/test_dtypes.py | 25 ++++++++++++--------- pandas/tests/frame/test_indexing.py | 35 ++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index dad2e5db14d24..51578ba20b047 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -836,23 +836,28 @@ def test_timedeltas(self): df = DataFrame(dict(A=Series(date_range('2012-1-1', periods=3, freq='D')), B=Series([timedelta(days=i) for i in range(3)]))) - result = Series(df._data.get_dtype_counts()).sort_index() - expected = Series( - {'datetime64[ns]': 1, 'timedelta64[ns]': 1}).sort_index() + result = df.dtypes + expected = Series([np.dtype('datetime64[ns]'), + np.dtype('timedelta64[ns]')], + index=list("AB")) assert_series_equal(result, expected) df['C'] = df['A'] + df['B'] - expected = Series( - {'datetime64[ns]': 2, 'timedelta64[ns]': 1}).sort_values() - result = Series(df._data.get_dtype_counts()).sort_values() + result = df.dtypes + expected = Series([np.dtype('datetime64[ns]'), + np.dtype('timedelta64[ns]'), + np.dtype('datetime64[ns]')], + index=list("ABC")) assert_series_equal(result, expected) # mixed int types df['D'] = 1 - expected = Series({'datetime64[ns]': 2, - 'timedelta64[ns]': 1, - 'int64': 1}).sort_values() - result = Series(df._data.get_dtype_counts()).sort_values() + result = df.dtypes + expected = Series([np.dtype('datetime64[ns]'), + np.dtype('timedelta64[ns]'), + np.dtype('datetime64[ns]'), + np.dtype('int64')], + index=list("ABCD")) assert_series_equal(result, expected) def test_arg_for_errors_in_astype(self): diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index ef589a125dee6..f8af942f67657 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -300,15 +300,23 @@ def test_getitem_boolean_casting(self, datetime_frame): df['F1'] = df['F'].copy() casted = df[df > 0] - result = Series(casted._data.get_dtype_counts()) - expected = Series({'float64': 4, 'int32': 2, 'int64': 2}) + result = casted.dtypes + expected = Series([np.dtype('float64')] * 4 + + [np.dtype('int32')] * 2 + + [np.dtype('int64')] * 2, + index=['A', 'B', 'C', 'D', 'E', 'E1', 'F', 'F1']) assert_series_equal(result, expected) # int block splitting df.loc[df.index[1:3], ['E1', 'F1']] = 0 casted = df[df > 0] - result = Series(casted._data.get_dtype_counts()) - expected = Series({'float64': 6, 'int32': 1, 'int64': 1}) + result = casted.dtypes + expected = Series([np.dtype('float64')] * 4 + + [np.dtype('int32')] + + [np.dtype('float64')] + + [np.dtype('int64')] + + [np.dtype('float64')], + index=['A', 'B', 'C', 'D', 'E', 'E1', 'F', 'F1']) assert_series_equal(result, expected) # where dtype conversions @@ -615,8 +623,9 @@ def test_setitem_cast(self, float_frame): df = DataFrame(np.random.rand(30, 3), columns=tuple('ABC')) df['event'] = np.nan df.loc[10, 'event'] = 'foo' - result = Series(df._data.get_dtype_counts()).sort_values() - expected = Series({'float64': 3, 'object': 1}).sort_values() + result = df.dtypes + expected = Series([np.dtype('float64')] * 3 + [np.dtype('object')], + index=['A', 'B', 'C', 'event']) assert_series_equal(result, expected) # Test that data type is preserved . #5782 @@ -1614,8 +1623,10 @@ def test_setitem_single_column_mixed_datetime(self): df['timestamp'] = Timestamp('20010102') # check our dtypes - result = Series(df._data.get_dtype_counts()) - expected = Series({'float64': 3, 'datetime64[ns]': 1}) + result = df.dtypes + expected = Series([np.dtype('float64')] * 3 + + [np.dtype('datetime64[ns]')], + index=['foo', 'bar', 'baz', 'timestamp']) assert_series_equal(result, expected) # set an allowable datetime64 type @@ -2637,13 +2648,17 @@ def _check_get(df, cond, check_dtypes=True): for c in ['float32', 'float64', 'int32', 'int64']}) df.iloc[1, :] = 0 - result = Series(df.where(df >= 0)._data.get_dtype_counts()) + result = df.dtypes + expected = Series([np.dtype('float32'), + np.dtype('float64'), + np.dtype('int32'), + np.dtype('int64')], + index=['float32', 'float64', 'int32', 'int64']) # when we don't preserve boolean casts # # expected = Series({ 'float32' : 1, 'float64' : 3 }) - expected = Series({'float32': 1, 'float64': 1, 'int32': 1, 'int64': 1}) assert_series_equal(result, expected) # aligning From eba5396ffebb4f476f64a3ffa105a31211026fda Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 30 Jun 2019 22:01:43 -0500 Subject: [PATCH 13/21] flake8 --- pandas/tests/frame/test_combine_concat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 290d5459dc26f..c1d057da91b8f 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -18,7 +18,8 @@ def test_concat_multiple_frames_dtypes(self): 'foo', 'bar'], dtype=np.float64) B = DataFrame(data=np.ones((10, 2)), dtype=np.float32) results = pd.concat((A, B), axis=1).dtypes - expected = Series([np.dtype('float64')] * 2 + [np.dtype('float32')] * 2, + expected = Series([np.dtype('float64')] * 2 + + [np.dtype('float32')] * 2, index=['foo', 'bar', 0, 1]) assert_series_equal(results, expected) From 4ce119490f6521091b859de383caa41dabd08b74 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 1 Jul 2019 10:56:43 -0700 Subject: [PATCH 14/21] update docstring --- pandas/core/computation/expressions.py | 6 +++--- pandas/core/generic.py | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index e40669ae69f6f..e256e9ed44f9a 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -79,11 +79,11 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): # check for dtype compatibility dtypes = set() for o in [a, b]: - if hasattr(o, '_data'): - s = o._data.get_dtype_counts() + if hasattr(o, 'dtypes'): + s = o.dtpyes.value_counts() if len(s) > 1: return False - dtypes |= set(s.keys()) + dtypes |= set(s.index.astype(str)) elif isinstance(o, np.ndarray): dtypes |= {o.dtype.name} diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9a274764e8f76..a511bade5eb40 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5275,6 +5275,8 @@ def get_dtype_counts(self): """ Return counts of unique dtypes in this object. + .. deprecated:: 0.25.0 + Returns ------- dtype : Series From cb6c8bd792cb06865c481169693c4d089113339b Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 1 Jul 2019 11:58:20 -0700 Subject: [PATCH 15/21] Address some failures --- doc/source/getting_started/basics.rst | 4 ++-- pandas/tests/frame/test_constructors.py | 8 +------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst index 3ba79210a43ee..e1508cb7b4e16 100644 --- a/doc/source/getting_started/basics.rst +++ b/doc/source/getting_started/basics.rst @@ -1968,11 +1968,11 @@ dtype of the column will be chosen to accommodate all of the data types pd.Series([1, 2, 3, 6., 'foo']) The number of columns of each type in a ``DataFrame`` can be found by calling -:meth:`~DataFrame.get_dtype_counts`. +``DataFrame.dtypes.value_counts()``. .. ipython:: python - dft.get_dtype_counts() + dft.dtypes.value_counts() Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passed (either directly via the ``dtype`` keyword, a passed ``ndarray``, diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 50a28a701af2b..8f4c926b6e760 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1597,7 +1597,7 @@ def test_constructor_with_datetimes(self): [np.dtype('int64')] + [np.dtype('object')] + [np.dtype('float64')] + - [np.dtype('int64')], + [np.dtype(intname)], index=['a', 'b', 'c', floatname, intname]) tm.assert_series_equal(result, expected) @@ -1689,12 +1689,6 @@ def test_constructor_datetimes_with_nulls(self): tm.assert_series_equal(result, expected) def test_constructor_for_list_with_dtypes(self): - # TODO(wesm): unused - intname = np.dtype(np.int_).name # noqa - floatname = np.dtype(np.float_).name # noqa - datetime64name = np.dtype('M8[ns]').name - objectname = np.dtype(np.object_).name - # test list of lists/ndarrays df = DataFrame([np.arange(5) for x in range(5)]) result = df.dtypes From abe310c0f3af9e7c94cea2ad65d3f045ce3d043f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 1 Jul 2019 16:00:15 -0700 Subject: [PATCH 16/21] typo --- pandas/core/computation/expressions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index e256e9ed44f9a..b01000a7aee5b 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -80,7 +80,7 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): dtypes = set() for o in [a, b]: if hasattr(o, 'dtypes'): - s = o.dtpyes.value_counts() + s = o.dtypes.value_counts() if len(s) > 1: return False dtypes |= set(s.index.astype(str)) From 6cb5d13d49fe4bddffe1316261b68efdac5063fd Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 1 Jul 2019 21:01:32 -0700 Subject: [PATCH 17/21] isort --- pandas/tests/frame/test_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 71df56f418e0e..6372029f2efe7 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -7,8 +7,8 @@ import pandas as pd from pandas import ( - Categorical, DataFrame, Series, SparseDataFrame, SparseDtype, - compat, date_range, timedelta_range) + Categorical, DataFrame, Series, SparseDataFrame, SparseDtype, compat, + date_range, timedelta_range) import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal) From ca70a46015178f4f420ec715aed06a847b446ff6 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 1 Jul 2019 21:06:15 -0700 Subject: [PATCH 18/21] Fix doc warnings --- doc/source/user_guide/io.rst | 2 +- doc/source/user_guide/missing_data.rst | 2 +- doc/source/whatsnew/v0.10.1.rst | 2 +- doc/source/whatsnew/v0.11.0.rst | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 9af6c36cc4e4d..fc7b1c00fd543 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3755,7 +3755,7 @@ defaults to `nan`. store.append('df_mixed', df_mixed, min_itemsize={'values': 50}) df_mixed1 = store.select('df_mixed') df_mixed1 - df_mixed1.get_dtype_counts() + df_mixed1.dtypes.value_counts() # we have provided a minimum string column size store.root.df_mixed.table diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index ef77826e9a444..6c36a6470f841 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -105,7 +105,7 @@ pandas objects provide compatibility between ``NaT`` and ``NaN``. df2 df2.loc[['a', 'c', 'h'], ['one', 'timestamp']] = np.nan df2 - df2.get_dtype_counts() + df2.dtypes.value_counts() .. _missing.inserting: diff --git a/doc/source/whatsnew/v0.10.1.rst b/doc/source/whatsnew/v0.10.1.rst index 7d51ded1cad19..c4251f70d85b6 100644 --- a/doc/source/whatsnew/v0.10.1.rst +++ b/doc/source/whatsnew/v0.10.1.rst @@ -89,7 +89,7 @@ You can now store ``datetime64`` in data columns store.append('df_mixed', df_mixed) df_mixed1 = store.select('df_mixed') df_mixed1 - df_mixed1.get_dtype_counts() + df_mixed1.dtypes.value_counts() You can pass ``columns`` keyword to select to filter a list of the return columns, this is equivalent to passing a diff --git a/doc/source/whatsnew/v0.11.0.rst b/doc/source/whatsnew/v0.11.0.rst index 31fab6c9aeb74..03480ebeed78e 100644 --- a/doc/source/whatsnew/v0.11.0.rst +++ b/doc/source/whatsnew/v0.11.0.rst @@ -296,7 +296,7 @@ Furthermore ``datetime64[ns]`` columns are created by default, when passed datet df # datetime64[ns] out of the box - df.get_dtype_counts() + df.dtypes.value_counts() # use the traditional nan, which is mapped to NaT internally df.loc[df.index[2:4], ['A', 'timestamp']] = np.nan From 86c0a08d08f8d48079a4278561c81fc0af6c67e3 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 1 Jul 2019 21:17:37 -0700 Subject: [PATCH 19/21] Edit tests for dict sorting and platform ints --- pandas/tests/frame/test_constructors.py | 5 +++-- pandas/tests/frame/test_timezones.py | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 8f4c926b6e760..73a8720adb5cc 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1611,7 +1611,7 @@ def test_constructor_with_datetimes(self): [np.dtype('int64')] + [np.dtype('object')] + [np.dtype('float64')] + - [np.dtype('int64')], + [np.dtype(intname)], index=['a', 'b', 'c', floatname, intname]) tm.assert_series_equal(result, expected) @@ -1625,7 +1625,8 @@ def test_constructor_with_datetimes(self): ind = date_range(start="2000-01-01", freq="D", periods=10) datetimes = [ts.to_pydatetime() for ts in ind] dates = [ts.date() for ts in ind] - df = DataFrame({'datetimes': datetimes, 'dates': dates}) + df = DataFrame(datetimes, columns=['datetimes']) + df['dates'] = dates result = df.dtypes expected = Series([np.dtype('datetime64[ns]'), np.dtype('object')], index=['datetimes', 'dates']) diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py index 1703fceeba9cf..b7c73daae0002 100644 --- a/pandas/tests/frame/test_timezones.py +++ b/pandas/tests/frame/test_timezones.py @@ -150,10 +150,10 @@ def test_frame_no_datetime64_dtype(self, tz): # GH#2810 (with timezones) datetimes_naive = [ts.to_pydatetime() for ts in dr] datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz] - df = DataFrame({'dr': dr, - 'dr_tz': dr_tz, - 'datetimes_naive': datetimes_naive, - 'datetimes_with_tz': datetimes_with_tz}) + df = DataFrame({'dr': dr}) + df['dr_tz'] = dr_tz + df['datetimes_naive'] = datetimes_naive + df['datetimes_with_tz'] = datetimes_with_tz result = df.dtypes expected = Series([ np.dtype('datetime64[ns]'), From 852bd649273264407d57a89bb2751ddbd20d8d0a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 2 Jul 2019 11:24:14 -0700 Subject: [PATCH 20/21] Add alternative method --- pandas/core/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6ccd577e8ed07..703b631d83669 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5291,7 +5291,8 @@ def get_dtype_counts(self): dtype: int64 """ warnings.warn("`get_dtype_counts` has been deprecated and will be " - "removed in a future version.", FutureWarning, + "removed in a future version. For DataFrames use " + "`.dtypes.value_counts()", FutureWarning, stacklevel=2) from pandas import Series return Series(self._data.get_dtype_counts()) From e700c6331abd47d93d63df76f1a586f12d714b28 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 2 Jul 2019 13:55:58 -0700 Subject: [PATCH 21/21] Add doc note --- pandas/core/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 703b631d83669..0679aa27b1ad3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5265,6 +5265,8 @@ def get_dtype_counts(self): .. deprecated:: 0.25.0 + Use `.dtypes.value_counts()` instead. + Returns ------- dtype : Series