diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 6cc34770a65e0..e082629a5433d 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -256,36 +256,40 @@ def test_constructor_with_generator(self): cat = Categorical([0, 1, 2], categories=xrange(3)) tm.assert_categorical_equal(cat, exp) - def test_constructor_with_datetimelike(self): - - # 12077 - # constructor wwth a datetimelike and NaT - - for dtl in [date_range('1995-01-01 00:00:00', periods=5, freq='s'), - date_range('1995-01-01 00:00:00', periods=5, - freq='s', tz='US/Eastern'), - timedelta_range('1 day', periods=5, freq='s')]: - - s = Series(dtl) - c = Categorical(s) - expected = type(dtl)(s) - expected.freq = None - tm.assert_index_equal(c.categories, expected) - tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype='int8')) - - # with NaT - s2 = s.copy() - s2.iloc[-1] = NaT - c = Categorical(s2) - expected = type(dtl)(s2.dropna()) - expected.freq = None - tm.assert_index_equal(c.categories, expected) - - exp = np.array([0, 1, 2, 3, -1], dtype=np.int8) - tm.assert_numpy_array_equal(c.codes, exp) - - result = repr(c) - assert 'NaT' in result + @pytest.mark.parametrize("dtl", [ + date_range("1995-01-01 00:00:00", periods=5, freq="s"), + date_range("1995-01-01 00:00:00", periods=5, + freq="s", tz="US/Eastern"), + timedelta_range("1 day", periods=5, freq="s") + ]) + def test_constructor_with_datetimelike(self, dtl): + # see gh-12077 + # constructor with a datetimelike and NaT + + s = Series(dtl) + c = Categorical(s) + + expected = type(dtl)(s) + expected.freq = None + + tm.assert_index_equal(c.categories, expected) + tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype="int8")) + + # with NaT + s2 = s.copy() + s2.iloc[-1] = NaT + c = Categorical(s2) + + expected = type(dtl)(s2.dropna()) + expected.freq = None + + tm.assert_index_equal(c.categories, expected) + + exp = np.array([0, 1, 2, 3, -1], dtype=np.int8) + tm.assert_numpy_array_equal(c.codes, exp) + + result = repr(c) + assert "NaT" in result def test_constructor_from_index_series_datetimetz(self): idx = date_range('2015-01-01 10:00', freq='D', periods=3, diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index edabc177535fc..a7a9faa9e77eb 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -82,25 +82,18 @@ def test_dtype_equal(name1, dtype1, name2, dtype2): assert not com.is_dtype_equal(dtype1, dtype2) -def test_dtype_equal_strict(): - - # we are strict on kind equality - for dtype in [np.int8, np.int16, np.int32]: - assert not com.is_dtype_equal(np.int64, dtype) - - for dtype in [np.float32]: - assert not com.is_dtype_equal(np.float64, dtype) - - # strict w.r.t. PeriodDtype - assert not com.is_dtype_equal(PeriodDtype('D'), PeriodDtype('2D')) - - # strict w.r.t. datetime64 - assert not com.is_dtype_equal( - com.pandas_dtype('datetime64[ns, US/Eastern]'), - com.pandas_dtype('datetime64[ns, CET]')) - - # see gh-15941: no exception should be raised - assert not com.is_dtype_equal(None, None) +@pytest.mark.parametrize("dtype1,dtype2", [ + (np.int8, np.int64), + (np.int16, np.int64), + (np.int32, np.int64), + (np.float32, np.float64), + (PeriodDtype("D"), PeriodDtype("2D")), # PeriodType + (com.pandas_dtype("datetime64[ns, US/Eastern]"), + com.pandas_dtype("datetime64[ns, CET]")), # Datetime + (None, None) # gh-15941: no exception should be raised. +]) +def test_dtype_equal_strict(dtype1, dtype2): + assert not com.is_dtype_equal(dtype1, dtype2) def get_is_dtype_funcs(): diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 2eed6b47df9e3..3e5c13208f164 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -85,12 +85,15 @@ def test_get(self): assert self.frame.get('foo') is None assert_series_equal(self.frame.get('foo', self.frame['B']), self.frame['B']) - # None - # GH 5652 - for df in [DataFrame(), DataFrame(columns=list('AB')), - DataFrame(columns=list('AB'), index=range(3))]: - result = df.get(None) - assert result is None + + @pytest.mark.parametrize("df", [ + DataFrame(), + DataFrame(columns=list("AB")), + DataFrame(columns=list("AB"), index=range(3)) + ]) + def test_get_none(self, df): + # see gh-5652 + assert df.get(None) is None def test_loc_iterable(self): idx = iter(['A', 'B', 'C']) @@ -472,11 +475,6 @@ def f(): assert smaller['col10'].dtype == np.object_ assert (smaller['col10'] == ['1', '2']).all() - # with a dtype - for dtype in ['int32', 'int64', 'float32', 'float64']: - self.frame[dtype] = np.array(arr, dtype=dtype) - assert self.frame[dtype].dtype.name == dtype - # dtype changing GH4204 df = DataFrame([[0, 0]]) df.iloc[0] = np.nan @@ -487,6 +485,13 @@ def f(): df.loc[0] = np.nan assert_frame_equal(df, expected) + @pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"]) + def test_setitem_dtype(self, dtype): + arr = randn(len(self.frame)) + + self.frame[dtype] = np.array(arr, dtype=dtype) + assert self.frame[dtype].dtype.name == dtype + def test_setitem_tuple(self): self.frame['A', 'B'] = self.frame['A'] assert_series_equal(self.frame['A', 'B'], self.frame[ @@ -746,18 +751,20 @@ def test_setitem_empty(self): result.loc[result.b.isna(), 'a'] = result.a assert_frame_equal(result, df) - def test_setitem_empty_frame_with_boolean(self): - # Test for issue #10126 + @pytest.mark.parametrize("dtype", ["float", "int64"]) + @pytest.mark.parametrize("kwargs", [ + dict(), + dict(index=[1]), + dict(columns=["A"]) + ]) + def test_setitem_empty_frame_with_boolean(self, dtype, kwargs): + # see gh-10126 + kwargs["dtype"] = dtype + df = DataFrame(**kwargs) - for dtype in ('float', 'int64'): - for df in [ - pd.DataFrame(dtype=dtype), - pd.DataFrame(dtype=dtype, index=[1]), - pd.DataFrame(dtype=dtype, columns=['A']), - ]: - df2 = df.copy() - df[df > df2] = 47 - assert_frame_equal(df, df2) + df2 = df.copy() + df[df > df2] = 47 + assert_frame_equal(df, df2) def test_setitem_scalars_no_index(self): # GH16823 / 17894 @@ -2007,48 +2014,54 @@ def test_nested_exception(self): except Exception as e: assert type(e) != UnboundLocalError - def test_reindex_methods(self): - df = pd.DataFrame({'x': list(range(5))}) + @pytest.mark.parametrize("method,expected_values", [ + ("nearest", [0, 1, 1, 2]), + ("pad", [np.nan, 0, 1, 1]), + ("backfill", [0, 1, 2, 2]) + ]) + def test_reindex_methods(self, method, expected_values): + df = pd.DataFrame({"x": list(range(5))}) + target = np.array([-0.1, 0.9, 1.1, 1.5]) + + expected = pd.DataFrame({'x': expected_values}, index=target) + actual = df.reindex(target, method=method) + assert_frame_equal(expected, actual) + + actual = df.reindex_like(df, method=method, tolerance=0) + assert_frame_equal(df, actual) + actual = df.reindex_like(df, method=method, tolerance=[0, 0, 0, 0]) + assert_frame_equal(df, actual) + + actual = df.reindex(target, method=method, tolerance=1) + assert_frame_equal(expected, actual) + actual = df.reindex(target, method=method, tolerance=[1, 1, 1, 1]) + assert_frame_equal(expected, actual) + + e2 = expected[::-1] + actual = df.reindex(target[::-1], method=method) + assert_frame_equal(e2, actual) + + new_order = [3, 0, 2, 1] + e2 = expected.iloc[new_order] + actual = df.reindex(target[new_order], method=method) + assert_frame_equal(e2, actual) + + switched_method = ('pad' if method == 'backfill' + else 'backfill' if method == 'pad' + else method) + actual = df[::-1].reindex(target, method=switched_method) + assert_frame_equal(expected, actual) + + def test_reindex_methods_nearest_special(self): + df = pd.DataFrame({"x": list(range(5))}) target = np.array([-0.1, 0.9, 1.1, 1.5]) - for method, expected_values in [('nearest', [0, 1, 1, 2]), - ('pad', [np.nan, 0, 1, 1]), - ('backfill', [0, 1, 2, 2])]: - expected = pd.DataFrame({'x': expected_values}, index=target) - actual = df.reindex(target, method=method) - assert_frame_equal(expected, actual) - - actual = df.reindex_like(df, method=method, tolerance=0) - assert_frame_equal(df, actual) - actual = df.reindex_like(df, method=method, tolerance=[0, 0, 0, 0]) - assert_frame_equal(df, actual) - - actual = df.reindex(target, method=method, tolerance=1) - assert_frame_equal(expected, actual) - actual = df.reindex(target, method=method, tolerance=[1, 1, 1, 1]) - assert_frame_equal(expected, actual) - - e2 = expected[::-1] - actual = df.reindex(target[::-1], method=method) - assert_frame_equal(e2, actual) - - new_order = [3, 0, 2, 1] - e2 = expected.iloc[new_order] - actual = df.reindex(target[new_order], method=method) - assert_frame_equal(e2, actual) - - switched_method = ('pad' if method == 'backfill' - else 'backfill' if method == 'pad' - else method) - actual = df[::-1].reindex(target, method=switched_method) - assert_frame_equal(expected, actual) - - expected = pd.DataFrame({'x': [0, 1, 1, np.nan]}, index=target) - actual = df.reindex(target, method='nearest', tolerance=0.2) + expected = pd.DataFrame({"x": [0, 1, 1, np.nan]}, index=target) + actual = df.reindex(target, method="nearest", tolerance=0.2) assert_frame_equal(expected, actual) - expected = pd.DataFrame({'x': [0, np.nan, 1, np.nan]}, index=target) - actual = df.reindex(target, method='nearest', + expected = pd.DataFrame({"x": [0, np.nan, 1, np.nan]}, index=target) + actual = df.reindex(target, method="nearest", tolerance=[0.5, 0.01, 0.4, 0.1]) assert_frame_equal(expected, actual) @@ -2599,59 +2612,56 @@ def _check_set(df, cond, check_dtypes=True): expected = df[df['a'] == 1].reindex(df.index) assert_frame_equal(result, expected) - def test_where_array_like(self): + @pytest.mark.parametrize("klass", [list, tuple, np.array]) + def test_where_array_like(self, klass): # see gh-15414 - klasses = [list, tuple, np.array] - - df = DataFrame({'a': [1, 2, 3]}) + df = DataFrame({"a": [1, 2, 3]}) cond = [[False], [True], [True]] - expected = DataFrame({'a': [np.nan, 2, 3]}) + expected = DataFrame({"a": [np.nan, 2, 3]}) - for klass in klasses: - result = df.where(klass(cond)) - assert_frame_equal(result, expected) + result = df.where(klass(cond)) + assert_frame_equal(result, expected) - df['b'] = 2 - expected['b'] = [2, np.nan, 2] + df["b"] = 2 + expected["b"] = [2, np.nan, 2] cond = [[False, True], [True, False], [True, True]] - for klass in klasses: - result = df.where(klass(cond)) - assert_frame_equal(result, expected) + result = df.where(klass(cond)) + assert_frame_equal(result, expected) - def test_where_invalid_input(self): + @pytest.mark.parametrize("cond", [ + [[1], [0], [1]], + Series([[2], [5], [7]]), + DataFrame({"a": [2, 5, 7]}), + [["True"], ["False"], ["True"]], + [[Timestamp("2017-01-01")], + [pd.NaT], [Timestamp("2017-01-02")]] + ]) + def test_where_invalid_input_single(self, cond): # see gh-15414: only boolean arrays accepted - df = DataFrame({'a': [1, 2, 3]}) + df = DataFrame({"a": [1, 2, 3]}) msg = "Boolean array expected for the condition" - conds = [ - [[1], [0], [1]], - Series([[2], [5], [7]]), - DataFrame({'a': [2, 5, 7]}), - [["True"], ["False"], ["True"]], - [[Timestamp("2017-01-01")], - [pd.NaT], [Timestamp("2017-01-02")]] - ] - - for cond in conds: - with tm.assert_raises_regex(ValueError, msg): - df.where(cond) - - df['b'] = 2 - conds = [ - [[0, 1], [1, 0], [1, 1]], - Series([[0, 2], [5, 0], [4, 7]]), - [["False", "True"], ["True", "False"], - ["True", "True"]], - DataFrame({'a': [2, 5, 7], 'b': [4, 8, 9]}), - [[pd.NaT, Timestamp("2017-01-01")], - [Timestamp("2017-01-02"), pd.NaT], - [Timestamp("2017-01-03"), Timestamp("2017-01-03")]] - ] - - for cond in conds: - with tm.assert_raises_regex(ValueError, msg): - df.where(cond) + with tm.assert_raises_regex(ValueError, msg): + df.where(cond) + + @pytest.mark.parametrize("cond", [ + [[0, 1], [1, 0], [1, 1]], + Series([[0, 2], [5, 0], [4, 7]]), + [["False", "True"], ["True", "False"], + ["True", "True"]], + DataFrame({"a": [2, 5, 7], "b": [4, 8, 9]}), + [[pd.NaT, Timestamp("2017-01-01")], + [Timestamp("2017-01-02"), pd.NaT], + [Timestamp("2017-01-03"), Timestamp("2017-01-03")]] + ]) + def test_where_invalid_input_multiple(self, cond): + # see gh-15414: only boolean arrays accepted + df = DataFrame({"a": [1, 2, 3], "b": [2, 2, 2]}) + msg = "Boolean array expected for the condition" + + with tm.assert_raises_regex(ValueError, msg): + df.where(cond) def test_where_dataframe_col_match(self): df = DataFrame([[1, 2, 3], [4, 5, 6]]) @@ -2690,9 +2700,7 @@ def test_where_ndframe_align(self): tm.assert_frame_equal(out, expected) def test_where_bug(self): - - # GH 2793 - + # see gh-2793 df = DataFrame({'a': [1.0, 2.0, 3.0, 4.0], 'b': [ 4.0, 3.0, 2.0, 1.0]}, dtype='float64') expected = DataFrame({'a': [np.nan, np.nan, 3.0, 4.0], 'b': [ @@ -2704,25 +2712,25 @@ def test_where_bug(self): result.where(result > 2, np.nan, inplace=True) assert_frame_equal(result, expected) - # mixed - for dtype in ['int16', 'int8', 'int32', 'int64']: - df = DataFrame({'a': np.array([1, 2, 3, 4], dtype=dtype), - 'b': np.array([4.0, 3.0, 2.0, 1.0], - dtype='float64')}) + def test_where_bug_mixed(self, sint_dtype): + # see gh-2793 + df = DataFrame({"a": np.array([1, 2, 3, 4], dtype=sint_dtype), + "b": np.array([4.0, 3.0, 2.0, 1.0], + dtype="float64")}) - expected = DataFrame({'a': [np.nan, np.nan, 3.0, 4.0], - 'b': [4.0, 3.0, np.nan, np.nan]}, - dtype='float64') + expected = DataFrame({"a": [np.nan, np.nan, 3.0, 4.0], + "b": [4.0, 3.0, np.nan, np.nan]}, + dtype="float64") - result = df.where(df > 2, np.nan) - assert_frame_equal(result, expected) + result = df.where(df > 2, np.nan) + assert_frame_equal(result, expected) - result = df.copy() - result.where(result > 2, np.nan, inplace=True) - assert_frame_equal(result, expected) + result = df.copy() + result.where(result > 2, np.nan, inplace=True) + assert_frame_equal(result, expected) - # transpositional issue - # GH7506 + def test_where_bug_transposition(self): + # see gh-7506 a = DataFrame({0: [1, 2], 1: [3, 4], 2: [5, 6]}) b = DataFrame({0: [np.nan, 8], 1: [9, np.nan], 2: [np.nan, np.nan]}) do_not_replace = b.isna() | (a > b) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 9df362a8e132f..f8a0f1688c64e 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -66,32 +66,38 @@ def test_intercept_builtin_sum(): tm.assert_series_equal(result2, expected) -def test_builtins_apply(): # GH8155 +# @pytest.mark.parametrize("f", [max, min, sum]) +# def test_builtins_apply(f): + +@pytest.mark.parametrize("f", [max, min, sum]) +@pytest.mark.parametrize('keys', [ + "jim", # Single key + ["jim", "joe"] # Multi-key +]) +def test_builtins_apply(keys, f): + # see gh-8155 df = pd.DataFrame(np.random.randint(1, 50, (1000, 2)), - columns=['jim', 'joe']) - df['jolie'] = np.random.randn(1000) + columns=["jim", "joe"]) + df["jolie"] = np.random.randn(1000) + + fname = f.__name__ + result = df.groupby(keys).apply(f) + ngroups = len(df.drop_duplicates(subset=keys)) - for keys in ['jim', ['jim', 'joe']]: # single key & multi-key - if keys == 'jim': - continue - for f in [max, min, sum]: - fname = f.__name__ - result = df.groupby(keys).apply(f) - result.shape - ngroups = len(df.drop_duplicates(subset=keys)) - assert result.shape == (ngroups, 3), 'invalid frame shape: '\ - '{} (expected ({}, 3))'.format(result.shape, ngroups) + assert_msg = ("invalid frame shape: {} " + "(expected ({}, 3))".format(result.shape, ngroups)) + assert result.shape == (ngroups, 3), assert_msg - tm.assert_frame_equal(result, # numpy's equivalent function - df.groupby(keys).apply(getattr(np, fname))) + tm.assert_frame_equal(result, # numpy's equivalent function + df.groupby(keys).apply(getattr(np, fname))) - if f != sum: - expected = df.groupby(keys).agg(fname).reset_index() - expected.set_index(keys, inplace=True, drop=False) - tm.assert_frame_equal(result, expected, check_dtype=False) + if f != sum: + expected = df.groupby(keys).agg(fname).reset_index() + expected.set_index(keys, inplace=True, drop=False) + tm.assert_frame_equal(result, expected, check_dtype=False) - tm.assert_series_equal(getattr(result, fname)(), - getattr(df, fname)()) + tm.assert_series_equal(getattr(result, fname)(), + getattr(df, fname)()) def test_arg_passthru(): @@ -365,34 +371,34 @@ def test_groupby_non_arithmetic_agg_types(dtype, method, data): tm.assert_frame_equal(t, df_out) -def test_groupby_non_arithmetic_agg_intlike_precision(): - # GH9311, GH6620 - c = 24650000000000000 - - inputs = ((Timestamp('2011-01-15 12:50:28.502376'), - Timestamp('2011-01-20 12:50:28.593448')), (1 + c, 2 + c)) +@pytest.mark.parametrize("i", [ + (Timestamp("2011-01-15 12:50:28.502376"), + Timestamp("2011-01-20 12:50:28.593448")), + (24650000000000001, 24650000000000002) +]) +def test_groupby_non_arithmetic_agg_int_like_precision(i): + # see gh-6620, gh-9311 + df = pd.DataFrame([{"a": 1, "b": i[0]}, {"a": 1, "b": i[1]}]) - for i in inputs: - df = pd.DataFrame([{'a': 1, 'b': i[0]}, {'a': 1, 'b': i[1]}]) + grp_exp = {"first": {"expected": i[0]}, + "last": {"expected": i[1]}, + "min": {"expected": i[0]}, + "max": {"expected": i[1]}, + "nth": {"expected": i[1], + "args": [1]}, + "count": {"expected": 2}} - grp_exp = {'first': {'expected': i[0]}, - 'last': {'expected': i[1]}, - 'min': {'expected': i[0]}, - 'max': {'expected': i[1]}, - 'nth': {'expected': i[1], - 'args': [1]}, - 'count': {'expected': 2}} + for method, data in compat.iteritems(grp_exp): + if "args" not in data: + data["args"] = [] - for method, data in compat.iteritems(grp_exp): - if 'args' not in data: - data['args'] = [] + grouped = df.groupby("a") + res = getattr(grouped, method)(*data["args"]) - grpd = df.groupby('a') - res = getattr(grpd, method)(*data['args']) - assert res.iloc[0].b == data['expected'] + assert res.iloc[0].b == data["expected"] -def test_fill_constistency(): +def test_fill_consistency(): # GH9221 # pass thru keyword arguments to the generated wrapper diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 57b04bfd82528..aec51afb99ef0 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -451,23 +451,49 @@ def f(group): assert_frame_equal(res, result.loc[key]) -def test_cython_group_transform_algos(): - # GH 4095 - dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32, - np.uint64, np.float32, np.float64] +def _check_cython_group_transform_cumulative(pd_op, np_op, dtype): + """ + Check a group transform that executes a cumulative function. + + Parameters + ---------- + pd_op : callable + The pandas cumulative function. + np_op : callable + The analogous one in NumPy. + dtype : type + The specified dtype of the data. + """ + + is_datetimelike = False + + data = np.array([[1], [2], [3], [4]], dtype=dtype) + ans = np.zeros_like(data) - ops = [(groupby.group_cumprod_float64, np.cumproduct, [np.float64]), - (groupby.group_cumsum, np.cumsum, dtypes)] + labels = np.array([0, 0, 0, 0], dtype=np.int64) + pd_op(ans, data, labels, is_datetimelike) + tm.assert_numpy_array_equal(np_op(data), ans[:, 0], + check_dtype=False) + + +def test_cython_group_transform_cumsum(any_real_dtype): + # see gh-4095 + dtype = np.dtype(any_real_dtype).type + pd_op, np_op = groupby.group_cumsum, np.cumsum + _check_cython_group_transform_cumulative(pd_op, np_op, dtype) + + +def test_cython_group_transform_cumprod(): + # see gh-4095 + dtype = np.float64 + pd_op, np_op = groupby.group_cumprod_float64, np.cumproduct + _check_cython_group_transform_cumulative(pd_op, np_op, dtype) + + +def test_cython_group_transform_algos(): + # see gh-4095 is_datetimelike = False - for pd_op, np_op, dtypes in ops: - for dtype in dtypes: - data = np.array([[1], [2], [3], [4]], dtype=dtype) - ans = np.zeros_like(data) - labels = np.array([0, 0, 0, 0], dtype=np.int64) - pd_op(ans, data, labels, is_datetimelike) - tm.assert_numpy_array_equal(np_op(data), ans[:, 0], - check_dtype=False) # with nans labels = np.array([0, 0, 0, 0, 0], dtype=np.int64) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index f4bdb7ba86aaf..bf2308cd8c097 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -157,40 +157,43 @@ def test_dt64ser_cmp_period_scalar(self): expected = Series([x > val for x in ser]) tm.assert_series_equal(result, expected) - def test_timestamp_compare_series(self): - # make sure we can compare Timestamps on the right AND left hand side - # GH#4982 - ser = pd.Series(pd.date_range('20010101', periods=10), name='dates') + @pytest.mark.parametrize("left,right", [ + ("lt", "gt"), + ("le", "ge"), + ("eq", "eq"), + ("ne", "ne"), + ]) + def test_timestamp_compare_series(self, left, right): + # see gh-4982 + # Make sure we can compare Timestamps on the right AND left hand side. + ser = pd.Series(pd.date_range("20010101", periods=10), name="dates") s_nat = ser.copy(deep=True) - ser[0] = pd.Timestamp('nat') - ser[3] = pd.Timestamp('nat') + ser[0] = pd.Timestamp("nat") + ser[3] = pd.Timestamp("nat") - ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} + left_f = getattr(operator, left) + right_f = getattr(operator, right) - for left, right in ops.items(): - left_f = getattr(operator, left) - right_f = getattr(operator, right) - - # no nats - expected = left_f(ser, pd.Timestamp('20010109')) - result = right_f(pd.Timestamp('20010109'), ser) - tm.assert_series_equal(result, expected) + # No NaT + expected = left_f(ser, pd.Timestamp("20010109")) + result = right_f(pd.Timestamp("20010109"), ser) + tm.assert_series_equal(result, expected) - # nats - expected = left_f(ser, pd.Timestamp('nat')) - result = right_f(pd.Timestamp('nat'), ser) - tm.assert_series_equal(result, expected) + # NaT + expected = left_f(ser, pd.Timestamp("nat")) + result = right_f(pd.Timestamp("nat"), ser) + tm.assert_series_equal(result, expected) - # compare to timestamp with series containing nats - expected = left_f(s_nat, pd.Timestamp('20010109')) - result = right_f(pd.Timestamp('20010109'), s_nat) - tm.assert_series_equal(result, expected) + # Compare to Timestamp with series containing NaT + expected = left_f(s_nat, pd.Timestamp("20010109")) + result = right_f(pd.Timestamp("20010109"), s_nat) + tm.assert_series_equal(result, expected) - # compare to nat with series containing nats - expected = left_f(s_nat, pd.Timestamp('nat')) - result = right_f(pd.Timestamp('nat'), s_nat) - tm.assert_series_equal(result, expected) + # Compare to NaT with series containing NaT + expected = left_f(s_nat, pd.Timestamp("nat")) + result = right_f(pd.Timestamp("nat"), s_nat) + tm.assert_series_equal(result, expected) def test_timestamp_equality(self): # GH#11034 diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py index b3330f866ba1f..2790464e2f811 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/sparse/test_array.py @@ -14,6 +14,11 @@ import pandas.util.testing as tm +@pytest.fixture(params=["integer", "block"]) +def kind(request): + return request.param + + class TestSparseArray(object): def setup_method(self, method): @@ -128,31 +133,32 @@ def test_scalar_with_index_infer_dtype(self, scalar, dtype): assert arr.dtype == dtype assert exp.dtype == dtype - def test_sparseseries_roundtrip(self): - # GH 13999 - for kind in ['integer', 'block']: - for fill in [1, np.nan, 0]: - arr = SparseArray([np.nan, 1, np.nan, 2, 3], kind=kind, - fill_value=fill) - res = SparseArray(SparseSeries(arr)) - tm.assert_sp_array_equal(arr, res) - - arr = SparseArray([0, 0, 0, 1, 1, 2], dtype=np.int64, - kind=kind, fill_value=fill) - res = SparseArray(SparseSeries(arr), dtype=np.int64) - tm.assert_sp_array_equal(arr, res) - - res = SparseArray(SparseSeries(arr)) - tm.assert_sp_array_equal(arr, res) - - for fill in [True, False, np.nan]: - arr = SparseArray([True, False, True, True], dtype=np.bool, - kind=kind, fill_value=fill) - res = SparseArray(SparseSeries(arr)) - tm.assert_sp_array_equal(arr, res) - - res = SparseArray(SparseSeries(arr)) - tm.assert_sp_array_equal(arr, res) + @pytest.mark.parametrize("fill", [1, np.nan, 0]) + def test_sparse_series_round_trip(self, kind, fill): + # see gh-13999 + arr = SparseArray([np.nan, 1, np.nan, 2, 3], + kind=kind, fill_value=fill) + res = SparseArray(SparseSeries(arr)) + tm.assert_sp_array_equal(arr, res) + + arr = SparseArray([0, 0, 0, 1, 1, 2], dtype=np.int64, + kind=kind, fill_value=fill) + res = SparseArray(SparseSeries(arr), dtype=np.int64) + tm.assert_sp_array_equal(arr, res) + + res = SparseArray(SparseSeries(arr)) + tm.assert_sp_array_equal(arr, res) + + @pytest.mark.parametrize("fill", [True, False, np.nan]) + def test_sparse_series_round_trip2(self, kind, fill): + # see gh-13999 + arr = SparseArray([True, False, True, True], dtype=np.bool, + kind=kind, fill_value=fill) + res = SparseArray(SparseSeries(arr)) + tm.assert_sp_array_equal(arr, res) + + res = SparseArray(SparseSeries(arr)) + tm.assert_sp_array_equal(arr, res) def test_get_item(self): @@ -388,18 +394,16 @@ def test_astype(self): with tm.assert_raises_regex(ValueError, msg): arr.astype('i8') - def test_astype_all(self): + def test_astype_all(self, any_real_dtype): vals = np.array([1, 2, 3]) arr = SparseArray(vals, fill_value=1) + typ = np.dtype(any_real_dtype).type - types = [np.float64, np.float32, np.int64, - np.int32, np.int16, np.int8] - for typ in types: - res = arr.astype(typ) - assert res.dtype == typ - assert res.sp_values.dtype == typ + res = arr.astype(typ) + assert res.dtype == typ + assert res.sp_values.dtype == typ - tm.assert_numpy_array_equal(res.values, vals.astype(typ)) + tm.assert_numpy_array_equal(res.values, vals.astype(typ)) def test_set_fill_value(self): arr = SparseArray([1., np.nan, 2.], fill_value=np.nan) @@ -432,11 +436,13 @@ def test_set_fill_value(self): with tm.assert_raises_regex(ValueError, msg): arr.fill_value = np.nan - # invalid + @pytest.mark.parametrize("val", [[1, 2, 3], np.array([1, 2]), (1, 2, 3)]) + def test_set_fill_invalid_non_scalar(self, val): + arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool) msg = "fill_value must be a scalar" - for val in [[1, 2, 3], np.array([1, 2]), (1, 2, 3)]: - with tm.assert_raises_regex(ValueError, msg): - arr.fill_value = val + + with tm.assert_raises_regex(ValueError, msg): + arr.fill_value = val def test_copy_shallow(self): arr2 = self.arr.copy(deep=False) @@ -538,9 +544,13 @@ def test_getslice_tuple(self): # check numpy compat dense[4:, :] - def test_binary_operators(self): + @pytest.mark.parametrize("op", ["add", "sub", "mul", + "truediv", "floordiv", "pow"]) + def test_binary_operators(self, op): + op = getattr(operator, op) data1 = np.random.randn(20) data2 = np.random.randn(20) + data1[::2] = np.nan data2[::3] = np.nan @@ -570,7 +580,7 @@ def _check_op(op, first, second): res4 = op(first, 4) assert isinstance(res4, SparseArray) - # ignore this if the actual op raises (e.g. pow) + # Ignore this if the actual op raises (e.g. pow). try: exp = op(first.values, 4) exp_fv = op(first.fill_value, 4) @@ -579,21 +589,26 @@ def _check_op(op, first, second): except ValueError: pass - def _check_inplace_op(op): - tmp = arr1.copy() - pytest.raises(NotImplementedError, op, tmp, arr2) + with np.errstate(all="ignore"): + for first_arr, second_arr in [(arr1, arr2), (farr1, farr2)]: + _check_op(op, first_arr, second_arr) - with np.errstate(all='ignore'): - bin_ops = [operator.add, operator.sub, operator.mul, - operator.truediv, operator.floordiv, operator.pow] - for op in bin_ops: - _check_op(op, arr1, arr2) - _check_op(op, farr1, farr2) + @pytest.mark.parametrize("op", ["iadd", "isub", "imul", + "ifloordiv", "ipow", + "itruediv"]) + def test_binary_operators_not_implemented(self, op): + data1 = np.random.randn(20) + data2 = np.random.randn(20) - inplace_ops = ['iadd', 'isub', 'imul', 'itruediv', 'ifloordiv', - 'ipow'] - for op in inplace_ops: - _check_inplace_op(getattr(operator, op)) + data1[::2] = np.nan + data2[::3] = np.nan + + arr1 = SparseArray(data1) + arr2 = SparseArray(data2) + + with np.errstate(all="ignore"): + with pytest.raises(NotImplementedError): + getattr(operator, op)(arr1, arr2) def test_pickle(self): def _check_roundtrip(obj): @@ -810,51 +825,26 @@ def test_numpy_sum(self): tm.assert_raises_regex(ValueError, msg, np.sum, SparseArray(data), out=out) - def test_cumsum(self): - non_null_data = np.array([1, 2, 3, 4, 5], dtype=float) - non_null_expected = SparseArray(non_null_data.cumsum()) - - null_data = np.array([1, 2, np.nan, 4, 5], dtype=float) - null_expected = SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0])) - - for data, expected in [ - (null_data, null_expected), - (non_null_data, non_null_expected) - ]: - out = SparseArray(data).cumsum() - tm.assert_sp_array_equal(out, expected) - - out = SparseArray(data, fill_value=np.nan).cumsum() - tm.assert_sp_array_equal(out, expected) - - out = SparseArray(data, fill_value=2).cumsum() - tm.assert_sp_array_equal(out, expected) - - axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid. - msg = "axis\\(={axis}\\) out of bounds".format(axis=axis) - with tm.assert_raises_regex(ValueError, msg): - SparseArray(data).cumsum(axis=axis) - - def test_numpy_cumsum(self): - non_null_data = np.array([1, 2, 3, 4, 5], dtype=float) - non_null_expected = SparseArray(non_null_data.cumsum()) - - null_data = np.array([1, 2, np.nan, 4, 5], dtype=float) - null_expected = SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0])) + @pytest.mark.parametrize("data,expected", [ + (np.array([1, 2, 3, 4, 5], dtype=float), # non-null data + SparseArray(np.array([1.0, 3.0, 6.0, 10.0, 15.0]))), + (np.array([1, 2, np.nan, 4, 5], dtype=float), # null data + SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0]))) + ]) + @pytest.mark.parametrize("numpy", [True, False]) + def test_cumsum(self, data, expected, numpy): + cumsum = np.cumsum if numpy else lambda s: s.cumsum() - for data, expected in [ - (null_data, null_expected), - (non_null_data, non_null_expected) - ]: - out = np.cumsum(SparseArray(data)) - tm.assert_sp_array_equal(out, expected) + out = cumsum(SparseArray(data)) + tm.assert_sp_array_equal(out, expected) - out = np.cumsum(SparseArray(data, fill_value=np.nan)) - tm.assert_sp_array_equal(out, expected) + out = cumsum(SparseArray(data, fill_value=np.nan)) + tm.assert_sp_array_equal(out, expected) - out = np.cumsum(SparseArray(data, fill_value=2)) - tm.assert_sp_array_equal(out, expected) + out = cumsum(SparseArray(data, fill_value=2)) + tm.assert_sp_array_equal(out, expected) + if numpy: # numpy compatibility checks. msg = "the 'dtype' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.cumsum, SparseArray(data), dtype=np.int64) @@ -862,6 +852,11 @@ def test_numpy_cumsum(self): msg = "the 'out' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.cumsum, SparseArray(data), out=out) + else: + axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid. + msg = "axis\\(={axis}\\) out of bounds".format(axis=axis) + with tm.assert_raises_regex(ValueError, msg): + SparseArray(data).cumsum(axis=axis) def test_mean(self): data = np.arange(10).astype(float) diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py index 5321a5a93fa3d..43c7d0951bf6c 100644 --- a/pandas/tests/tools/test_numeric.py +++ b/pandas/tests/tools/test_numeric.py @@ -161,19 +161,15 @@ def test_all_nan(self): expected = pd.Series([np.nan, np.nan, np.nan]) tm.assert_series_equal(res, expected) - def test_type_check(self): + @pytest.mark.parametrize("errors", [None, "ignore", "raise", "coerce"]) + def test_type_check(self, errors): # see gh-11776 df = pd.DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]}) + kwargs = dict(errors=errors) if errors is not None else dict() error_ctx = tm.assert_raises_regex(TypeError, "1-d array") - # Check default parameters. with error_ctx: - to_numeric(df) - - # Check each parameter value for `errors`. - for errors in ["ignore", "raise", "coerce"]: - with error_ctx: - to_numeric(df, errors=errors) + to_numeric(df, **kwargs) def test_scalar(self): assert pd.to_numeric(1) == 1