diff --git a/ci/code_checks.sh b/ci/code_checks.sh index ab44598e04440..f01cd9ba01470 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -205,6 +205,8 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then MSG='Check for inconsistent use of pandas namespace in tests' ; echo $MSG check_namespace "Series" RET=$(($RET + $?)) + check_namespace "DataFrame" + RET=$(($RET + $?)) echo $MSG "DONE" fi diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index cd6a430829442..5f556718ea0d3 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -722,14 +722,14 @@ def test_timedelta_ops_with_missing_values(self): sn = pd.to_timedelta(Series([pd.NaT], dtype="m8[ns]")) - df1 = pd.DataFrame(["00:00:01"]).apply(pd.to_timedelta) - df2 = pd.DataFrame(["00:00:02"]).apply(pd.to_timedelta) + df1 = DataFrame(["00:00:01"]).apply(pd.to_timedelta) + df2 = DataFrame(["00:00:02"]).apply(pd.to_timedelta) with pytest.raises(TypeError, match=msg): # Passing datetime64-dtype data to TimedeltaIndex is no longer # supported GH#29794 - pd.DataFrame([pd.NaT]).apply(pd.to_timedelta) + DataFrame([pd.NaT]).apply(pd.to_timedelta) - dfn = pd.DataFrame([pd.NaT.value]).apply(pd.to_timedelta) + dfn = DataFrame([pd.NaT.value]).apply(pd.to_timedelta) scalar1 = pd.to_timedelta("00:00:01") scalar2 = pd.to_timedelta("00:00:02") diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 2c5846872c341..5796ea52899d2 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -667,7 +667,7 @@ def test_unary_in_array(self): @pytest.mark.parametrize("dtype", [np.float32, np.float64]) def test_float_comparison_bin_op(self, dtype): # GH 16363 - df = pd.DataFrame({"x": np.array([0], dtype=dtype)}) + df = DataFrame({"x": np.array([0], dtype=dtype)}) res = df.eval("x < -0.1") assert res.values == np.array([False]) @@ -734,7 +734,7 @@ def test_float_truncation(self): expected = np.float64(exp) assert result == expected - df = pd.DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]}) + df = DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]}) cutoff = 1000000000.0006 result = df.query(f"A < {cutoff:.4f}") assert result.empty @@ -751,12 +751,12 @@ def test_float_truncation(self): def test_disallow_python_keywords(self): # GH 18221 - df = pd.DataFrame([[0, 0, 0]], columns=["foo", "bar", "class"]) + df = DataFrame([[0, 0, 0]], columns=["foo", "bar", "class"]) msg = "Python keyword not valid identifier in numexpr query" with pytest.raises(SyntaxError, match=msg): df.query("class == 0") - df = pd.DataFrame() + df = DataFrame() df.index.name = "lambda" with pytest.raises(SyntaxError, match=msg): df.query("lambda == 0") @@ -1366,7 +1366,7 @@ def assignment_not_inplace(self): def test_multi_line_expression(self): # GH 11149 - df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) expected = df.copy() expected["c"] = expected["a"] + expected["b"] @@ -1403,7 +1403,7 @@ def test_multi_line_expression(self): def test_multi_line_expression_not_inplace(self): # GH 11149 - df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) expected = df.copy() expected["c"] = expected["a"] + expected["b"] @@ -1428,7 +1428,7 @@ def test_multi_line_expression_not_inplace(self): def test_multi_line_expression_local_variable(self): # GH 15342 - df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) expected = df.copy() local_var = 7 @@ -1446,7 +1446,7 @@ def test_multi_line_expression_local_variable(self): def test_multi_line_expression_callable_local_variable(self): # 26426 - df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) def local_func(a, b): return b @@ -1466,7 +1466,7 @@ def local_func(a, b): def test_multi_line_expression_callable_local_variable_with_kwargs(self): # 26426 - df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) def local_func(a, b): return b @@ -1486,7 +1486,7 @@ def local_func(a, b): def test_assignment_in_query(self): # GH 8664 - df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df_orig = df.copy() msg = "cannot assign without a target object" with pytest.raises(ValueError, match=msg): @@ -1495,7 +1495,7 @@ def test_assignment_in_query(self): def test_query_inplace(self): # see gh-11149 - df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) expected = df.copy() expected = expected[expected["a"] == 2] df.query("a == 2", inplace=True) @@ -2052,7 +2052,7 @@ def test_truediv_deprecated(engine, parser): def test_negate_lt_eq_le(engine, parser): - df = pd.DataFrame([[0, 10], [1, 20]], columns=["cat", "count"]) + df = DataFrame([[0, 10], [1, 20]], columns=["cat", "count"]) expected = df[~(df.cat > 0)] result = df.query("~(cat > 0)", engine=engine, parser=parser) diff --git a/pandas/tests/frame/apply/test_frame_apply.py b/pandas/tests/frame/apply/test_frame_apply.py index 58e91c38fc294..1d01aa48e115f 100644 --- a/pandas/tests/frame/apply/test_frame_apply.py +++ b/pandas/tests/frame/apply/test_frame_apply.py @@ -358,7 +358,7 @@ def test_apply_reduce_Series(self, float_frame): def test_apply_reduce_rows_to_dict(self): # GH 25196 - data = pd.DataFrame([[1, 2], [3, 4]]) + data = DataFrame([[1, 2], [3, 4]]) expected = Series([{0: 1, 1: 3}, {0: 2, 1: 4}]) result = data.apply(dict) tm.assert_series_equal(result, expected) @@ -445,7 +445,7 @@ def transform2(row): def test_apply_bug(self): # GH 6125 - positions = pd.DataFrame( + positions = DataFrame( [ [1, "ABC0", 50], [1, "YUM0", 20], @@ -619,10 +619,10 @@ def test_applymap(self, float_frame): # GH 8222 empty_frames = [ - pd.DataFrame(), - pd.DataFrame(columns=list("ABC")), - pd.DataFrame(index=list("ABC")), - pd.DataFrame({"A": [], "B": [], "C": []}), + DataFrame(), + DataFrame(columns=list("ABC")), + DataFrame(index=list("ABC")), + DataFrame({"A": [], "B": [], "C": []}), ] for frame in empty_frames: for func in [round, lambda x: x]: @@ -653,11 +653,11 @@ def func(x): return (x.hour, x.day, x.month) # it works! - pd.DataFrame(ser).applymap(func) + DataFrame(ser).applymap(func) def test_applymap_box(self): # ufunc will not be boxed. Same test cases as the test_map_box - df = pd.DataFrame( + df = DataFrame( { "a": [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")], "b": [ @@ -673,7 +673,7 @@ def test_applymap_box(self): ) result = df.applymap(lambda x: type(x).__name__) - expected = pd.DataFrame( + expected = DataFrame( { "a": ["Timestamp", "Timestamp"], "b": ["Timestamp", "Timestamp"], @@ -713,8 +713,8 @@ def test_apply_non_numpy_dtype(self): def test_apply_dup_names_multi_agg(self): # GH 21063 - df = pd.DataFrame([[0, 1], [2, 3]], columns=["a", "a"]) - expected = pd.DataFrame([[0, 1]], columns=["a", "a"], index=["min"]) + df = DataFrame([[0, 1], [2, 3]], columns=["a", "a"]) + expected = DataFrame([[0, 1]], columns=["a", "a"], index=["min"]) result = df.agg(["min"]) tm.assert_frame_equal(result, expected) @@ -724,7 +724,7 @@ def test_apply_nested_result_axis_1(self): def apply_list(row): return [2 * row["A"], 2 * row["C"], 2 * row["B"]] - df = pd.DataFrame(np.zeros((4, 4)), columns=list("ABCD")) + df = DataFrame(np.zeros((4, 4)), columns=list("ABCD")) result = df.apply(apply_list, axis=1) expected = Series( [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] @@ -733,7 +733,7 @@ def apply_list(row): def test_apply_noreduction_tzaware_object(self): # https://github.com/pandas-dev/pandas/issues/31505 - df = pd.DataFrame( + df = DataFrame( {"foo": [pd.Timestamp("2020", tz="UTC")]}, dtype="datetime64[ns, UTC]" ) result = df.apply(lambda x: x) @@ -744,7 +744,7 @@ def test_apply_noreduction_tzaware_object(self): def test_apply_function_runs_once(self): # https://github.com/pandas-dev/pandas/issues/30815 - df = pd.DataFrame({"a": [1, 2, 3]}) + df = DataFrame({"a": [1, 2, 3]}) names = [] # Save row names function is applied to def reducing_function(row): @@ -763,7 +763,7 @@ def non_reducing_function(row): def test_apply_raw_function_runs_once(self): # https://github.com/pandas-dev/pandas/issues/34506 - df = pd.DataFrame({"a": [1, 2, 3]}) + df = DataFrame({"a": [1, 2, 3]}) values = [] # Save row values function is applied to def reducing_function(row): @@ -781,7 +781,7 @@ def non_reducing_function(row): def test_applymap_function_runs_once(self): - df = pd.DataFrame({"a": [1, 2, 3]}) + df = DataFrame({"a": [1, 2, 3]}) values = [] # Save values function is applied to def reducing_function(val): @@ -799,8 +799,8 @@ def non_reducing_function(val): def test_apply_with_byte_string(self): # GH 34529 - df = pd.DataFrame(np.array([b"abcd", b"efgh"]), columns=["col"]) - expected = pd.DataFrame( + df = DataFrame(np.array([b"abcd", b"efgh"]), columns=["col"]) + expected = DataFrame( np.array([b"abcd", b"efgh"]), columns=["col"], dtype=object ) # After we make the aply we exect a dataframe just @@ -812,7 +812,7 @@ def test_apply_with_byte_string(self): def test_apply_category_equalness(self, val): # Check if categorical comparisons on apply, GH 21239 df_values = ["asd", None, 12, "asd", "cde", np.NaN] - df = pd.DataFrame({"a": df_values}, dtype="category") + df = DataFrame({"a": df_values}, dtype="category") result = df.a.apply(lambda x: x == val) expected = Series( @@ -829,7 +829,7 @@ class TestInferOutputShape: def test_infer_row_shape(self): # GH 17437 # if row shape is changing, infer it - df = pd.DataFrame(np.random.rand(10, 2)) + df = DataFrame(np.random.rand(10, 2)) result = df.apply(np.fft.fft, axis=0) assert result.shape == (10, 2) @@ -954,7 +954,7 @@ def test_infer_output_shape_listlike_columns(self): tm.assert_series_equal(result, expected) # GH 17892 - df = pd.DataFrame( + df = DataFrame( { "a": [ pd.Timestamp("2010-02-01"), @@ -1122,7 +1122,7 @@ def test_transform_and_agg_err(self, axis, float_frame): with np.errstate(all="ignore"): float_frame.agg(["max", "sqrt"], axis=axis) - df = pd.DataFrame({"A": range(5), "B": 5}) + df = DataFrame({"A": range(5), "B": 5}) def f(): with np.errstate(all="ignore"): @@ -1130,7 +1130,7 @@ def f(): def test_demo(self): # demonstration tests - df = pd.DataFrame({"A": range(5), "B": 5}) + df = DataFrame({"A": range(5), "B": 5}) result = df.agg(["min", "max"]) expected = DataFrame( @@ -1149,7 +1149,7 @@ def test_demo(self): def test_agg_with_name_as_column_name(self): # GH 36212 - Column name is "name" data = {"name": ["foo", "bar"]} - df = pd.DataFrame(data) + df = DataFrame(data) # result's name should be None result = df.agg({"name": "count"}) @@ -1163,7 +1163,7 @@ def test_agg_with_name_as_column_name(self): def test_agg_multiple_mixed_no_warning(self): # GH 20909 - mdf = pd.DataFrame( + mdf = DataFrame( { "A": [1, 2, 3], "B": [1.0, 2.0, 3.0], @@ -1171,7 +1171,7 @@ def test_agg_multiple_mixed_no_warning(self): "D": pd.date_range("20130101", periods=3), } ) - expected = pd.DataFrame( + expected = DataFrame( { "A": [1, 6], "B": [1.0, 6.0], @@ -1197,7 +1197,7 @@ def test_agg_multiple_mixed_no_warning(self): def test_agg_dict_nested_renaming_depr(self): - df = pd.DataFrame({"A": range(5), "B": 5}) + df = DataFrame({"A": range(5), "B": 5}) # nested renaming msg = r"nested renamer is not supported" @@ -1343,7 +1343,7 @@ def test_non_callable_aggregates(self): result2 = df.agg( {"A": ["count", "size"], "B": ["count", "size"], "C": ["count", "size"]} ) - expected = pd.DataFrame( + expected = DataFrame( { "A": {"count": 2, "size": 3}, "B": {"count": 2, "size": 3}, @@ -1480,7 +1480,7 @@ def test_agg_args_kwargs(self, axis, args, kwargs): def f(x, a, b, c=3): return x.sum() + (a + b) / c - df = pd.DataFrame([[1, 2], [3, 4]]) + df = DataFrame([[1, 2], [3, 4]]) if axis == 0: expected = Series([5.0, 7.0]) @@ -1514,7 +1514,7 @@ def test_apply_datetime_tz_issue(self): tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("df", [pd.DataFrame({"A": ["a", None], "B": ["c", "d"]})]) + @pytest.mark.parametrize("df", [DataFrame({"A": ["a", None], "B": ["c", "d"]})]) @pytest.mark.parametrize("method", ["min", "max", "sum"]) def test_consistency_of_aggregates_of_columns_with_missing_values(self, df, method): # GH 16832 @@ -1528,7 +1528,7 @@ def test_consistency_of_aggregates_of_columns_with_missing_values(self, df, meth @pytest.mark.parametrize("col", [1, 1.0, True, "a", np.nan]) def test_apply_dtype(self, col): # GH 31466 - df = pd.DataFrame([[1.0, col]], columns=["a", "b"]) + df = DataFrame([[1.0, col]], columns=["a", "b"]) result = df.apply(lambda x: x.dtype) expected = df.dtypes @@ -1537,7 +1537,7 @@ def test_apply_dtype(self, col): def test_apply_mutating(): # GH#35462 case where applied func pins a new BlockManager to a row - df = pd.DataFrame({"a": range(100), "b": range(100, 200)}) + df = DataFrame({"a": range(100), "b": range(100, 200)}) def func(row): mgr = row._mgr @@ -1556,7 +1556,7 @@ def func(row): def test_apply_empty_list_reduce(): # GH#35683 get columns correct - df = pd.DataFrame([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]], columns=["a", "b"]) + df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]], columns=["a", "b"]) result = df.apply(lambda x: [], result_type="reduce") expected = Series({"a": [], "b": []}, dtype=object) @@ -1565,9 +1565,9 @@ def test_apply_empty_list_reduce(): def test_apply_no_suffix_index(): # GH36189 - pdf = pd.DataFrame([[4, 9]] * 3, columns=["A", "B"]) + pdf = DataFrame([[4, 9]] * 3, columns=["A", "B"]) result = pdf.apply(["sum", lambda x: x.sum(), lambda x: x.sum()]) - expected = pd.DataFrame( + expected = DataFrame( {"A": [12, 12, 12], "B": [27, 27, 27]}, index=["sum", "", ""] ) @@ -1576,7 +1576,7 @@ def test_apply_no_suffix_index(): def test_apply_raw_returns_string(): # https://github.com/pandas-dev/pandas/issues/35940 - df = pd.DataFrame({"A": ["aa", "bbb"]}) + df = DataFrame({"A": ["aa", "bbb"]}) result = df.apply(lambda x: x[0], axis=1, raw=True) expected = Series(["aa", "bbb"]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/indexing/test_categorical.py b/pandas/tests/frame/indexing/test_categorical.py index 314de5bdd8146..c876f78176e2e 100644 --- a/pandas/tests/frame/indexing/test_categorical.py +++ b/pandas/tests/frame/indexing/test_categorical.py @@ -397,7 +397,7 @@ def test_loc_indexing_preserves_index_category_dtype(self): def test_categorical_filtering(self): # GH22609 Verify filtering operations on DataFrames with categorical Series - df = pd.DataFrame(data=[[0, 0], [1, 1]], columns=["a", "b"]) + df = DataFrame(data=[[0, 0], [1, 1]], columns=["a", "b"]) df["b"] = df.b.astype("category") result = df.where(df.a > 0) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 4687d94b52c80..0dee818613edb 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -75,7 +75,7 @@ def test_loc_iterable(self, float_frame, key_type): def test_loc_timedelta_0seconds(self): # GH#10583 - df = pd.DataFrame(np.random.normal(size=(10, 4))) + df = DataFrame(np.random.normal(size=(10, 4))) df.index = pd.timedelta_range(start="0s", periods=10, freq="s") expected = df.loc[pd.Timedelta("0s") :, :] result = df.loc["0s":, :] @@ -200,7 +200,7 @@ def test_setitem_list_of_tuples(self, float_frame): ( ["A", "B", "C", "D"], 7, - pd.DataFrame( + DataFrame( [[7, 7, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]], columns=["A", "B", "C", "D"], ), @@ -208,7 +208,7 @@ def test_setitem_list_of_tuples(self, float_frame): ( ["C", "D"], [7, 8], - pd.DataFrame( + DataFrame( [[1, 2, 7, 8], [3, 4, 7, 8], [5, 6, 7, 8]], columns=["A", "B", "C", "D"], ), @@ -216,14 +216,12 @@ def test_setitem_list_of_tuples(self, float_frame): ( ["A", "B", "C"], np.array([7, 8, 9], dtype=np.int64), - pd.DataFrame( - [[7, 8, 9], [7, 8, 9], [7, 8, 9]], columns=["A", "B", "C"] - ), + DataFrame([[7, 8, 9], [7, 8, 9], [7, 8, 9]], columns=["A", "B", "C"]), ), ( ["B", "C", "D"], [[7, 8, 9], [10, 11, 12], [13, 14, 15]], - pd.DataFrame( + DataFrame( [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]], columns=["A", "B", "C", "D"], ), @@ -231,15 +229,15 @@ def test_setitem_list_of_tuples(self, float_frame): ( ["C", "A", "D"], np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype=np.int64), - pd.DataFrame( + DataFrame( [[8, 2, 7, 9], [11, 4, 10, 12], [14, 6, 13, 15]], columns=["A", "B", "C", "D"], ), ), ( ["A", "C"], - pd.DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), - pd.DataFrame( + DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), + DataFrame( [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"] ), ), @@ -247,7 +245,7 @@ def test_setitem_list_of_tuples(self, float_frame): ) def test_setitem_list_missing_columns(self, columns, box, expected): # GH 29334 - df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) + df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) df[columns] = box tm.assert_frame_equal(df, expected) @@ -259,7 +257,7 @@ def test_setitem_multi_index(self): cols = MultiIndex.from_product(it) index = pd.date_range("20141006", periods=20) vals = np.random.randint(1, 1000, (len(index), len(cols))) - df = pd.DataFrame(vals, columns=cols, index=index) + df = DataFrame(vals, columns=cols, index=index) i, j = df.index.values.copy(), it[-1][:] @@ -277,10 +275,10 @@ def test_setitem_multi_index(self): def test_setitem_callable(self): # GH 12533 - df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}) + df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}) df[lambda x: "A"] = [11, 12, 13, 14] - exp = pd.DataFrame({"A": [11, 12, 13, 14], "B": [5, 6, 7, 8]}) + exp = DataFrame({"A": [11, 12, 13, 14], "B": [5, 6, 7, 8]}) tm.assert_frame_equal(df, exp) def test_setitem_other_callable(self): @@ -288,10 +286,10 @@ def test_setitem_other_callable(self): def inc(x): return x + 1 - df = pd.DataFrame([[-1, 1], [1, -1]]) + df = DataFrame([[-1, 1], [1, -1]]) df[df > 0] = inc - expected = pd.DataFrame([[-1, inc], [inc, -1]]) + expected = DataFrame([[-1, inc], [inc, -1]]) tm.assert_frame_equal(df, expected) def test_getitem_boolean( @@ -440,7 +438,7 @@ def test_getitem_ix_mixed_integer(self): tm.assert_frame_equal(result, expected) # 11320 - df = pd.DataFrame( + df = DataFrame( { "rna": (1.5, 2.2, 3.2, 4.5), -1000: [11, 21, 36, 40], @@ -782,7 +780,7 @@ def test_setitem_None(self, float_frame): def test_setitem_empty(self): # GH 9596 - df = pd.DataFrame( + df = DataFrame( {"a": ["1", "2", "3"], "b": ["11", "22", "33"], "c": ["111", "222", "333"]} ) @@ -804,9 +802,9 @@ def test_setitem_empty_frame_with_boolean(self, dtype, kwargs): def test_setitem_with_empty_listlike(self): # GH #17101 index = pd.Index([], name="idx") - result = pd.DataFrame(columns=["A"], index=index) + result = DataFrame(columns=["A"], index=index) result["A"] = [] - expected = pd.DataFrame(columns=["A"], index=index) + expected = DataFrame(columns=["A"], index=index) tm.assert_index_equal(result.index, expected.index) def test_setitem_scalars_no_index(self): @@ -819,7 +817,7 @@ def test_setitem_scalars_no_index(self): def test_getitem_empty_frame_with_boolean(self): # Test for issue #11859 - df = pd.DataFrame() + df = DataFrame() df2 = df[df > 0] tm.assert_frame_equal(df, df2) @@ -887,11 +885,11 @@ def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame): def test_setitem_slice_position(self): # GH#31469 - df = pd.DataFrame(np.zeros((100, 1))) + df = DataFrame(np.zeros((100, 1))) df[-4:] = 1 arr = np.zeros((100, 1)) arr[-4:] = 1 - expected = pd.DataFrame(arr) + expected = DataFrame(arr) tm.assert_frame_equal(df, expected) def test_getitem_setitem_non_ix_labels(self): @@ -1190,7 +1188,7 @@ def test_setitem_mixed_datetime(self): ], } ) - df = pd.DataFrame(0, columns=list("ab"), index=range(6)) + df = DataFrame(0, columns=list("ab"), index=range(6)) df["b"] = pd.NaT df.loc[0, "b"] = datetime(2012, 1, 1) df.loc[1, "b"] = 1 @@ -1392,7 +1390,7 @@ def test_lookup_raises(self, float_frame): def test_lookup_requires_unique_axes(self): # GH#33041 raise with a helpful error message - df = pd.DataFrame(np.random.randn(6).reshape(3, 2), columns=["A", "A"]) + df = DataFrame(np.random.randn(6).reshape(3, 2), columns=["A", "A"]) rows = [0, 1] cols = ["A", "A"] @@ -1481,7 +1479,7 @@ def test_reindex_with_multi_index(self): # 1: 2.0 # 2: 5.0 # 3: 5.8 - df = pd.DataFrame( + df = DataFrame( { "a": [-1] * 7 + [0] * 7 + [1] * 7, "b": list(range(7)) * 3, @@ -1493,13 +1491,13 @@ def test_reindex_with_multi_index(self): # reindexing w/o a `method` value reindexed = df.reindex(new_multi_index) - expected = pd.DataFrame( + expected = DataFrame( {"a": [0] * 4, "b": new_index, "c": [np.nan, "C", "F", np.nan]} ).set_index(["a", "b"]) tm.assert_frame_equal(expected, reindexed) # reindexing with backfilling - expected = pd.DataFrame( + expected = DataFrame( {"a": [0] * 4, "b": new_index, "c": ["B", "C", "F", "G"]} ).set_index(["a", "b"]) reindexed_with_backfilling = df.reindex(new_multi_index, method="bfill") @@ -1509,7 +1507,7 @@ def test_reindex_with_multi_index(self): tm.assert_frame_equal(expected, reindexed_with_backfilling) # reindexing with padding - expected = pd.DataFrame( + expected = DataFrame( {"a": [0] * 4, "b": new_index, "c": ["A", "C", "F", "F"]} ).set_index(["a", "b"]) reindexed_with_padding = df.reindex(new_multi_index, method="pad") @@ -1560,7 +1558,7 @@ def test_single_element_ix_dont_upcast(self, float_frame): assert is_integer(result) # GH 11617 - df = pd.DataFrame(dict(a=[1.23])) + df = DataFrame(dict(a=[1.23])) df["b"] = 666 result = df.loc[0, "b"] @@ -1660,19 +1658,19 @@ def test_loc_duplicates(self): trange = trange.insert(loc=5, item=pd.Timestamp(year=2017, month=1, day=5)) - df = pd.DataFrame(0, index=trange, columns=["A", "B"]) + df = DataFrame(0, index=trange, columns=["A", "B"]) bool_idx = np.array([False, False, False, False, False, True]) # assignment df.loc[trange[bool_idx], "A"] = 6 - expected = pd.DataFrame( + expected = DataFrame( {"A": [0, 0, 0, 0, 6, 6], "B": [0, 0, 0, 0, 0, 0]}, index=trange ) tm.assert_frame_equal(df, expected) # in-place - df = pd.DataFrame(0, index=trange, columns=["A", "B"]) + df = DataFrame(0, index=trange, columns=["A", "B"]) df.loc[trange[bool_idx], "A"] += 6 tm.assert_frame_equal(df, expected) @@ -1685,10 +1683,10 @@ def test_loc_duplicates(self): ], ) def test_reindex_methods(self, method, expected_values): - df = pd.DataFrame({"x": list(range(5))}) + df = DataFrame({"x": list(range(5))}) target = np.array([-0.1, 0.9, 1.1, 1.5]) - expected = pd.DataFrame({"x": expected_values}, index=target) + expected = DataFrame({"x": expected_values}, index=target) actual = df.reindex(target, method=method) tm.assert_frame_equal(expected, actual) @@ -1713,14 +1711,14 @@ def test_reindex_methods(self, method, expected_values): tm.assert_frame_equal(expected, actual) def test_reindex_methods_nearest_special(self): - df = pd.DataFrame({"x": list(range(5))}) + df = DataFrame({"x": list(range(5))}) target = np.array([-0.1, 0.9, 1.1, 1.5]) - expected = pd.DataFrame({"x": [0, 1, 1, np.nan]}, index=target) + expected = DataFrame({"x": [0, 1, 1, np.nan]}, index=target) actual = df.reindex(target, method="nearest", tolerance=0.2) tm.assert_frame_equal(expected, actual) - expected = pd.DataFrame({"x": [0, np.nan, 1, np.nan]}, index=target) + expected = DataFrame({"x": [0, np.nan, 1, np.nan]}, index=target) actual = df.reindex(target, method="nearest", tolerance=[0.5, 0.01, 0.4, 0.1]) tm.assert_frame_equal(expected, actual) @@ -1728,7 +1726,7 @@ def test_reindex_nearest_tz(self, tz_aware_fixture): # GH26683 tz = tz_aware_fixture idx = pd.date_range("2019-01-01", periods=5, tz=tz) - df = pd.DataFrame({"x": list(range(5))}, index=idx) + df = DataFrame({"x": list(range(5))}, index=idx) expected = df.head(3) actual = df.reindex(idx[:3], method="nearest") @@ -1737,8 +1735,8 @@ def test_reindex_nearest_tz(self, tz_aware_fixture): def test_reindex_nearest_tz_empty_frame(self): # https://github.com/pandas-dev/pandas/issues/31964 dti = pd.DatetimeIndex(["2016-06-26 14:27:26+00:00"]) - df = pd.DataFrame(index=pd.DatetimeIndex(["2016-07-04 14:00:59+00:00"])) - expected = pd.DataFrame(index=dti) + df = DataFrame(index=pd.DatetimeIndex(["2016-07-04 14:00:59+00:00"])) + expected = DataFrame(index=dti) result = df.reindex(dti, method="nearest") tm.assert_frame_equal(result, expected) @@ -1776,8 +1774,8 @@ def test_set_dataframe_column_ns_dtype(self): def test_non_monotonic_reindex_methods(self): dr = pd.date_range("2013-08-01", periods=6, freq="B") data = np.random.randn(6, 1) - df = pd.DataFrame(data, index=dr, columns=list("A")) - df_rev = pd.DataFrame(data, index=dr[[3, 4, 5] + [0, 1, 2]], columns=list("A")) + df = DataFrame(data, index=dr, columns=list("A")) + df_rev = DataFrame(data, index=dr[[3, 4, 5] + [0, 1, 2]], columns=list("A")) # index is not monotonic increasing or decreasing msg = "index must be monotonic increasing or decreasing" with pytest.raises(ValueError, match=msg): @@ -1808,7 +1806,7 @@ def verify(df, level, idx, indexer, check_index_type=True): right = df.iloc[indexer].set_index(icol) tm.assert_frame_equal(left, right, check_index_type=check_index_type) - df = pd.DataFrame( + df = DataFrame( { "jim": list("B" * 4 + "A" * 2 + "C" * 3), "joe": list("abcdeabcd")[::-1], @@ -1886,7 +1884,7 @@ def verify(df, level, idx, indexer, check_index_type=True): verify(df, "joe", ["3rd", "1st"], i) def test_getitem_ix_float_duplicates(self): - df = pd.DataFrame( + df = DataFrame( np.random.randn(3, 3), index=[0.1, 0.2, 0.2], columns=list("abc") ) expect = df.iloc[1:] @@ -1902,7 +1900,7 @@ def test_getitem_ix_float_duplicates(self): expect = df.iloc[1:, 0] tm.assert_series_equal(df.loc[0.2, "a"], expect) - df = pd.DataFrame( + df = DataFrame( np.random.randn(4, 3), index=[1, 0.2, 0.2, 1], columns=list("abc") ) expect = df.iloc[1:-1] @@ -1923,17 +1921,17 @@ def test_setitem_with_unaligned_tz_aware_datetime_column(self): # Assignment of unaligned offset-aware datetime series. # Make sure timezone isn't lost column = Series(pd.date_range("2015-01-01", periods=3, tz="utc"), name="dates") - df = pd.DataFrame({"dates": column}) + df = DataFrame({"dates": column}) df["dates"] = column[[1, 0, 2]] tm.assert_series_equal(df["dates"], column) - df = pd.DataFrame({"dates": column}) + df = DataFrame({"dates": column}) df.loc[[0, 1, 2], "dates"] = column[[1, 0, 2]] tm.assert_series_equal(df["dates"], column) def test_setitem_datetime_coercion(self): # gh-1048 - df = pd.DataFrame({"c": [pd.Timestamp("2010-10-01")] * 3}) + df = DataFrame({"c": [pd.Timestamp("2010-10-01")] * 3}) df.loc[0:1, "c"] = np.datetime64("2008-08-08") assert pd.Timestamp("2008-08-08") == df.loc[0, "c"] assert pd.Timestamp("2008-08-08") == df.loc[1, "c"] @@ -2140,7 +2138,7 @@ def test_type_error_multiindex(self): def test_interval_index(self): # GH 19977 index = pd.interval_range(start=0, periods=3) - df = pd.DataFrame( + df = DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"] ) @@ -2149,7 +2147,7 @@ def test_interval_index(self): tm.assert_almost_equal(result, expected) index = pd.interval_range(start=0, periods=3, closed="both") - df = pd.DataFrame( + df = DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"] ) @@ -2160,7 +2158,7 @@ def test_interval_index(self): def test_getitem_interval_index_partial_indexing(self): # GH#36490 - df = pd.DataFrame( + df = DataFrame( np.ones((3, 4)), columns=pd.IntervalIndex.from_breaks(np.arange(5)) ) @@ -2218,7 +2216,7 @@ def test_set_reset(self): def test_object_casting_indexing_wraps_datetimelike(): # GH#31649, check the indexing methods all the way down the stack - df = pd.DataFrame( + df = DataFrame( { "A": [1, 2], "B": pd.date_range("2000", periods=2), @@ -2257,7 +2255,7 @@ def test_object_casting_indexing_wraps_datetimelike(): def test_lookup_deprecated(): # GH18262 - df = pd.DataFrame( + df = DataFrame( {"col": ["A", "A", "B", "B"], "A": [80, 23, np.nan, 22], "B": [80, 55, 76, 67]} ) with tm.assert_produces_warning(FutureWarning): diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index d114a3178b686..95209c0c35195 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -399,7 +399,7 @@ def test_where_none(self): def test_where_empty_df_and_empty_cond_having_non_bool_dtypes(self): # see gh-21947 - df = pd.DataFrame(columns=["a"]) + df = DataFrame(columns=["a"]) cond = df assert (cond.dtypes == object).all() diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index 8fdaa27144aed..36a57fadff623 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -197,8 +197,8 @@ def test_align_multiindex(self): [range(2), range(3), range(2)], names=("a", "b", "c") ) idx = pd.Index(range(2), name="b") - df1 = pd.DataFrame(np.arange(12, dtype="int64"), index=midx) - df2 = pd.DataFrame(np.arange(2, dtype="int64"), index=idx) + df1 = DataFrame(np.arange(12, dtype="int64"), index=midx) + df2 = DataFrame(np.arange(2, dtype="int64"), index=idx) # these must be the same results (but flipped) res1l, res1r = df1.align(df2, join="left") @@ -207,7 +207,7 @@ def test_align_multiindex(self): expl = df1 tm.assert_frame_equal(expl, res1l) tm.assert_frame_equal(expl, res2r) - expr = pd.DataFrame([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx) + expr = DataFrame([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx) tm.assert_frame_equal(expr, res1r) tm.assert_frame_equal(expr, res2l) @@ -217,20 +217,20 @@ def test_align_multiindex(self): exp_idx = pd.MultiIndex.from_product( [range(2), range(2), range(2)], names=("a", "b", "c") ) - expl = pd.DataFrame([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx) + expl = DataFrame([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx) tm.assert_frame_equal(expl, res1l) tm.assert_frame_equal(expl, res2r) - expr = pd.DataFrame([0, 0, 1, 1] * 2, index=exp_idx) + expr = DataFrame([0, 0, 1, 1] * 2, index=exp_idx) tm.assert_frame_equal(expr, res1r) tm.assert_frame_equal(expr, res2l) def test_align_series_combinations(self): - df = pd.DataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE")) + df = DataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE")) s = Series([1, 2, 4], index=list("ABD"), name="x") # frame + series res1, res2 = df.align(s, axis=0) - exp1 = pd.DataFrame( + exp1 = DataFrame( {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]}, index=list("ABCDE"), ) diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py index e4c469dd888b4..133e8c03fab3d 100644 --- a/pandas/tests/frame/methods/test_append.py +++ b/pandas/tests/frame/methods/test_append.py @@ -177,7 +177,7 @@ def test_append_dtypes(self): def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): # GH 30238 tz = tz_naive_fixture - df = pd.DataFrame([pd.Timestamp(timestamp, tz=tz)]) + df = DataFrame([pd.Timestamp(timestamp, tz=tz)]) result = df.append(df.iloc[0]).iloc[-1] expected = Series(pd.Timestamp(timestamp, tz=tz), name=0) tm.assert_series_equal(result, expected) @@ -193,7 +193,7 @@ def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): ], ) def test_other_dtypes(self, data, dtype): - df = pd.DataFrame(data, dtype=dtype) + df = DataFrame(data, dtype=dtype) result = df.append(df.iloc[0]).iloc[-1] expected = Series(data, name=0, dtype=dtype) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py index ca62b56664518..2da6c6e3f0a51 100644 --- a/pandas/tests/frame/methods/test_clip.py +++ b/pandas/tests/frame/methods/test_clip.py @@ -1,7 +1,6 @@ import numpy as np import pytest -import pandas as pd from pandas import DataFrame, Series import pandas._testing as tm @@ -100,7 +99,7 @@ def test_clip_against_list_like(self, simple_frame, inplace, lower, axis, res): result = original.clip(lower=lower, upper=[5, 6, 7], axis=axis, inplace=inplace) - expected = pd.DataFrame(res, columns=original.columns, index=original.index) + expected = DataFrame(res, columns=original.columns, index=original.index) if inplace: result = original tm.assert_frame_equal(result, expected, check_exact=True) diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 78f265d32f8df..d1f38d90547fd 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -18,7 +18,7 @@ def test_combine_first_mixed(self): b = Series(range(2), index=range(5, 7)) g = DataFrame({"A": a, "B": b}) - exp = pd.DataFrame( + exp = DataFrame( {"A": list("abab"), "B": [0.0, 1.0, 0.0, 1.0]}, index=[0, 1, 5, 6] ) combined = f.combine_first(g) @@ -169,13 +169,13 @@ def test_combine_first_mixed_bug(self): def test_combine_first_align_nan(self): # GH 7509 (not fixed) - dfa = pd.DataFrame([[pd.Timestamp("2011-01-01"), 2]], columns=["a", "b"]) - dfb = pd.DataFrame([[4], [5]], columns=["b"]) + dfa = DataFrame([[pd.Timestamp("2011-01-01"), 2]], columns=["a", "b"]) + dfb = DataFrame([[4], [5]], columns=["b"]) assert dfa["a"].dtype == "datetime64[ns]" assert dfa["b"].dtype == "int64" res = dfa.combine_first(dfb) - exp = pd.DataFrame( + exp = DataFrame( {"a": [pd.Timestamp("2011-01-01"), pd.NaT], "b": [2.0, 5.0]}, columns=["a", "b"], ) @@ -185,7 +185,7 @@ def test_combine_first_align_nan(self): assert res["b"].dtype == "float64" res = dfa.iloc[:0].combine_first(dfb) - exp = pd.DataFrame({"a": [np.nan, np.nan], "b": [4, 5]}, columns=["a", "b"]) + exp = DataFrame({"a": [np.nan, np.nan], "b": [4, 5]}, columns=["a", "b"]) tm.assert_frame_equal(res, exp) # ToDo: this must be datetime64 assert res["a"].dtype == "float64" @@ -195,21 +195,21 @@ def test_combine_first_align_nan(self): def test_combine_first_timezone(self): # see gh-7630 data1 = pd.to_datetime("20100101 01:01").tz_localize("UTC") - df1 = pd.DataFrame( + df1 = DataFrame( columns=["UTCdatetime", "abc"], data=data1, index=pd.date_range("20140627", periods=1), dtype="object", ) data2 = pd.to_datetime("20121212 12:12").tz_localize("UTC") - df2 = pd.DataFrame( + df2 = DataFrame( columns=["UTCdatetime", "xyz"], data=data2, index=pd.date_range("20140628", periods=1), dtype="object", ) res = df2[["UTCdatetime"]].combine_first(df1) - exp = pd.DataFrame( + exp = DataFrame( { "UTCdatetime": [ pd.Timestamp("2010-01-01 01:01", tz="UTC"), @@ -230,9 +230,9 @@ def test_combine_first_timezone(self): # see gh-10567 dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="UTC") - df1 = pd.DataFrame({"DATE": dts1}) + df1 = DataFrame({"DATE": dts1}) dts2 = pd.date_range("2015-01-03", "2015-01-05", tz="UTC") - df2 = pd.DataFrame({"DATE": dts2}) + df2 = DataFrame({"DATE": dts2}) res = df1.combine_first(df2) tm.assert_frame_equal(res, df1) @@ -241,11 +241,11 @@ def test_combine_first_timezone(self): dts1 = pd.DatetimeIndex( ["2011-01-01", "NaT", "2011-01-03", "2011-01-04"], tz="US/Eastern" ) - df1 = pd.DataFrame({"DATE": dts1}, index=[1, 3, 5, 7]) + df1 = DataFrame({"DATE": dts1}, index=[1, 3, 5, 7]) dts2 = pd.DatetimeIndex( ["2012-01-01", "2012-01-02", "2012-01-03"], tz="US/Eastern" ) - df2 = pd.DataFrame({"DATE": dts2}, index=[2, 4, 5]) + df2 = DataFrame({"DATE": dts2}, index=[2, 4, 5]) res = df1.combine_first(df2) exp_dts = pd.DatetimeIndex( @@ -259,14 +259,14 @@ def test_combine_first_timezone(self): ], tz="US/Eastern", ) - exp = pd.DataFrame({"DATE": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + exp = DataFrame({"DATE": exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) # different tz dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="US/Eastern") - df1 = pd.DataFrame({"DATE": dts1}) + df1 = DataFrame({"DATE": dts1}) dts2 = pd.date_range("2015-01-03", "2015-01-05") - df2 = pd.DataFrame({"DATE": dts2}) + df2 = DataFrame({"DATE": dts2}) # if df1 doesn't have NaN, keep its dtype res = df1.combine_first(df2) @@ -274,9 +274,9 @@ def test_combine_first_timezone(self): assert res["DATE"].dtype == "datetime64[ns, US/Eastern]" dts1 = pd.date_range("2015-01-01", "2015-01-02", tz="US/Eastern") - df1 = pd.DataFrame({"DATE": dts1}) + df1 = DataFrame({"DATE": dts1}) dts2 = pd.date_range("2015-01-01", "2015-01-03") - df2 = pd.DataFrame({"DATE": dts2}) + df2 = DataFrame({"DATE": dts2}) res = df1.combine_first(df2) exp_dts = [ @@ -284,41 +284,41 @@ def test_combine_first_timezone(self): pd.Timestamp("2015-01-02", tz="US/Eastern"), pd.Timestamp("2015-01-03"), ] - exp = pd.DataFrame({"DATE": exp_dts}) + exp = DataFrame({"DATE": exp_dts}) tm.assert_frame_equal(res, exp) assert res["DATE"].dtype == "object" def test_combine_first_timedelta(self): data1 = pd.TimedeltaIndex(["1 day", "NaT", "3 day", "4day"]) - df1 = pd.DataFrame({"TD": data1}, index=[1, 3, 5, 7]) + df1 = DataFrame({"TD": data1}, index=[1, 3, 5, 7]) data2 = pd.TimedeltaIndex(["10 day", "11 day", "12 day"]) - df2 = pd.DataFrame({"TD": data2}, index=[2, 4, 5]) + df2 = DataFrame({"TD": data2}, index=[2, 4, 5]) res = df1.combine_first(df2) exp_dts = pd.TimedeltaIndex( ["1 day", "10 day", "NaT", "11 day", "3 day", "4 day"] ) - exp = pd.DataFrame({"TD": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + exp = DataFrame({"TD": exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) assert res["TD"].dtype == "timedelta64[ns]" def test_combine_first_period(self): data1 = pd.PeriodIndex(["2011-01", "NaT", "2011-03", "2011-04"], freq="M") - df1 = pd.DataFrame({"P": data1}, index=[1, 3, 5, 7]) + df1 = DataFrame({"P": data1}, index=[1, 3, 5, 7]) data2 = pd.PeriodIndex(["2012-01-01", "2012-02", "2012-03"], freq="M") - df2 = pd.DataFrame({"P": data2}, index=[2, 4, 5]) + df2 = DataFrame({"P": data2}, index=[2, 4, 5]) res = df1.combine_first(df2) exp_dts = pd.PeriodIndex( ["2011-01", "2012-01", "NaT", "2012-02", "2011-03", "2011-04"], freq="M" ) - exp = pd.DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + exp = DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) assert res["P"].dtype == data1.dtype # different freq dts2 = pd.PeriodIndex(["2012-01-01", "2012-01-02", "2012-01-03"], freq="D") - df2 = pd.DataFrame({"P": dts2}, index=[2, 4, 5]) + df2 = DataFrame({"P": dts2}, index=[2, 4, 5]) res = df1.combine_first(df2) exp_dts = [ @@ -329,15 +329,15 @@ def test_combine_first_period(self): pd.Period("2011-03", freq="M"), pd.Period("2011-04", freq="M"), ] - exp = pd.DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + exp = DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) assert res["P"].dtype == "object" def test_combine_first_int(self): # GH14687 - integer series that do no align exactly - df1 = pd.DataFrame({"a": [0, 1, 3, 5]}, dtype="int64") - df2 = pd.DataFrame({"a": [1, 4]}, dtype="int64") + df1 = DataFrame({"a": [0, 1, 3, 5]}, dtype="int64") + df2 = DataFrame({"a": [1, 4]}, dtype="int64") res = df1.combine_first(df2) tm.assert_frame_equal(res, df1) @@ -346,10 +346,10 @@ def test_combine_first_int(self): @pytest.mark.parametrize("val", [1, 1.0]) def test_combine_first_with_asymmetric_other(self, val): # see gh-20699 - df1 = pd.DataFrame({"isNum": [val]}) - df2 = pd.DataFrame({"isBool": [True]}) + df1 = DataFrame({"isNum": [val]}) + df2 = DataFrame({"isBool": [True]}) res = df1.combine_first(df2) - exp = pd.DataFrame({"isBool": [True], "isNum": [val]}) + exp = DataFrame({"isBool": [True], "isNum": [val]}) tm.assert_frame_equal(res, exp) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index 87c9dc32650c0..7eeeb245534f5 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -74,10 +74,10 @@ def test_cov_ddof(self, test_ddof): ) def test_cov_nullable_integer(self, other_column): # https://github.com/pandas-dev/pandas/issues/33803 - data = pd.DataFrame({"a": pd.array([1, 2, None]), "b": other_column}) + data = DataFrame({"a": pd.array([1, 2, None]), "b": other_column}) result = data.cov() arr = np.array([[0.5, 0.5], [0.5, 1.0]]) - expected = pd.DataFrame(arr, columns=["a", "b"], index=["a", "b"]) + expected = DataFrame(arr, columns=["a", "b"], index=["a", "b"]) tm.assert_frame_equal(result, expected) @@ -155,7 +155,7 @@ def test_corr_int_and_boolean(self): def test_corr_cov_independent_index_column(self): # GH#14617 - df = pd.DataFrame(np.random.randn(4 * 10).reshape(10, 4), columns=list("abcd")) + df = DataFrame(np.random.randn(4 * 10).reshape(10, 4), columns=list("abcd")) for method in ["cov", "corr"]: result = getattr(df, method)() assert result.index is not result.columns @@ -163,7 +163,7 @@ def test_corr_cov_independent_index_column(self): def test_corr_invalid_method(self): # GH#22298 - df = pd.DataFrame(np.random.normal(size=(10, 2))) + df = DataFrame(np.random.normal(size=(10, 2))) msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, " with pytest.raises(ValueError, match=msg): df.corr(method="____") @@ -186,15 +186,15 @@ def test_corr_int(self): @pytest.mark.parametrize("method", ["pearson", "spearman", "kendall"]) def test_corr_nullable_integer(self, nullable_column, other_column, method): # https://github.com/pandas-dev/pandas/issues/33803 - data = pd.DataFrame({"a": nullable_column, "b": other_column}) + data = DataFrame({"a": nullable_column, "b": other_column}) result = data.corr(method=method) - expected = pd.DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"]) + expected = DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"]) tm.assert_frame_equal(result, expected) def test_corr_item_cache(self): # Check that corr does not lead to incorrect entries in item_cache - df = pd.DataFrame({"A": range(10)}) + df = DataFrame({"A": range(10)}) df["B"] = range(10)[::-1] ser = df["A"] # populate item_cache @@ -275,7 +275,7 @@ def test_corrwith_matches_corrcoef(self): def test_corrwith_mixed_dtypes(self): # GH#18570 - df = pd.DataFrame( + df = DataFrame( {"a": [1, 4, 3, 2], "b": [4, 6, 7, 3], "c": ["a", "b", "c", "d"]} ) s = Series([0, 6, 7, 3]) @@ -285,16 +285,16 @@ def test_corrwith_mixed_dtypes(self): tm.assert_series_equal(result, expected) def test_corrwith_index_intersection(self): - df1 = pd.DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) - df2 = pd.DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) + df1 = DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) + df2 = DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) result = df1.corrwith(df2, drop=True).index.sort_values() expected = df1.columns.intersection(df2.columns).sort_values() tm.assert_index_equal(result, expected) def test_corrwith_index_union(self): - df1 = pd.DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) - df2 = pd.DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) + df1 = DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) + df2 = DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) result = df1.corrwith(df2, drop=False).index.sort_values() expected = df1.columns.union(df2.columns).sort_values() @@ -302,7 +302,7 @@ def test_corrwith_index_union(self): def test_corrwith_dup_cols(self): # GH#21925 - df1 = pd.DataFrame(np.vstack([np.arange(10)] * 3).T) + df1 = DataFrame(np.vstack([np.arange(10)] * 3).T) df2 = df1.copy() df2 = pd.concat((df2, df2[0]), axis=1) @@ -313,7 +313,7 @@ def test_corrwith_dup_cols(self): @td.skip_if_no_scipy def test_corrwith_spearman(self): # GH#21925 - df = pd.DataFrame(np.random.random(size=(100, 3))) + df = DataFrame(np.random.random(size=(100, 3))) result = df.corrwith(df ** 2, method="spearman") expected = Series(np.ones(len(result))) tm.assert_series_equal(result, expected) @@ -321,7 +321,7 @@ def test_corrwith_spearman(self): @td.skip_if_no_scipy def test_corrwith_kendall(self): # GH#21925 - df = pd.DataFrame(np.random.random(size=(100, 3))) + df = DataFrame(np.random.random(size=(100, 3))) result = df.corrwith(df ** 2, method="kendall") expected = Series(np.ones(len(result))) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index d10d4c8ea05ab..0358bc3c04539 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -34,9 +34,9 @@ def test_describe_bool_in_mixed_frame(self): def test_describe_empty_object(self): # GH#27183 - df = pd.DataFrame({"A": [None, None]}, dtype=object) + df = DataFrame({"A": [None, None]}, dtype=object) result = df.describe() - expected = pd.DataFrame( + expected = DataFrame( {"A": [0, 0, np.nan, np.nan]}, dtype=object, index=["count", "unique", "top", "freq"], @@ -48,7 +48,7 @@ def test_describe_empty_object(self): def test_describe_bool_frame(self): # GH#13891 - df = pd.DataFrame( + df = DataFrame( { "bool_data_1": [False, False, True, True], "bool_data_2": [False, True, True, True], @@ -61,7 +61,7 @@ def test_describe_bool_frame(self): ) tm.assert_frame_equal(result, expected) - df = pd.DataFrame( + df = DataFrame( { "bool_data": [False, False, True, True, False], "int_data": [0, 1, 2, 3, 4], @@ -74,7 +74,7 @@ def test_describe_bool_frame(self): ) tm.assert_frame_equal(result, expected) - df = pd.DataFrame( + df = DataFrame( {"bool_data": [False, False, True, True], "str_data": ["a", "b", "c", "a"]} ) result = df.describe() @@ -119,7 +119,7 @@ def test_describe_empty_categorical_column(self): # GH#26397 # Ensure the index of an an empty categorical DataFrame column # also contains (count, unique, top, freq) - df = pd.DataFrame({"empty_col": Categorical([])}) + df = DataFrame({"empty_col": Categorical([])}) result = df.describe() expected = DataFrame( {"empty_col": [0, 0, np.nan, np.nan]}, @@ -198,7 +198,7 @@ def test_describe_timedelta_values(self): # GH#6145 t1 = pd.timedelta_range("1 days", freq="D", periods=5) t2 = pd.timedelta_range("1 hours", freq="H", periods=5) - df = pd.DataFrame({"t1": t1, "t2": t2}) + df = DataFrame({"t1": t1, "t2": t2}) expected = DataFrame( { @@ -249,7 +249,7 @@ def test_describe_tz_values(self, tz_naive_fixture): start = Timestamp(2018, 1, 1) end = Timestamp(2018, 1, 5) s2 = Series(date_range(start, end, tz=tz)) - df = pd.DataFrame({"s1": s1, "s2": s2}) + df = DataFrame({"s1": s1, "s2": s2}) expected = DataFrame( { @@ -271,9 +271,9 @@ def test_describe_tz_values(self, tz_naive_fixture): tm.assert_frame_equal(result, expected) def test_datetime_is_numeric_includes_datetime(self): - df = pd.DataFrame({"a": pd.date_range("2012", periods=3), "b": [1, 2, 3]}) + df = DataFrame({"a": pd.date_range("2012", periods=3), "b": [1, 2, 3]}) result = df.describe(datetime_is_numeric=True) - expected = pd.DataFrame( + expected = DataFrame( { "a": [ 3, @@ -297,7 +297,7 @@ def test_describe_tz_values2(self): start = Timestamp(2018, 1, 1) end = Timestamp(2018, 1, 5) s2 = Series(date_range(start, end, tz=tz)) - df = pd.DataFrame({"s1": s1, "s2": s2}) + df = DataFrame({"s1": s1, "s2": s2}) s1_ = s1.describe() s2_ = Series( @@ -334,7 +334,7 @@ def test_describe_tz_values2(self): def test_describe_percentiles_integer_idx(self): # GH#26660 - df = pd.DataFrame({"x": [1]}) + df = DataFrame({"x": [1]}) pct = np.linspace(0, 1, 10 + 1) result = df.describe(percentiles=pct) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index 9ef6ba5f410a9..8affcce478cf4 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -8,7 +8,7 @@ class TestDataFrameDiff: def test_diff_requires_integer(self): - df = pd.DataFrame(np.random.randn(2, 2)) + df = DataFrame(np.random.randn(2, 2)) with pytest.raises(ValueError, match="periods must be an integer"): df.diff(1.5) @@ -33,10 +33,10 @@ def test_diff(self, datetime_frame): tm.assert_series_equal(the_diff["A"], tf["A"] - tf["A"].shift(1)) # GH#10907 - df = pd.DataFrame({"y": Series([2]), "z": Series([3])}) + df = DataFrame({"y": Series([2]), "z": Series([3])}) df.insert(0, "x", 1) result = df.diff(axis=1) - expected = pd.DataFrame({"x": np.nan, "y": Series(1), "z": Series(1)}) + expected = DataFrame({"x": np.nan, "y": Series(1), "z": Series(1)}) tm.assert_frame_equal(result, expected) def test_diff_timedelta64_with_nat(self): @@ -44,12 +44,10 @@ def test_diff_timedelta64_with_nat(self): arr = np.arange(6).reshape(3, 2).astype("timedelta64[ns]") arr[:, 0] = np.timedelta64("NaT", "ns") - df = pd.DataFrame(arr) + df = DataFrame(arr) result = df.diff(1, axis=0) - expected = pd.DataFrame( - {0: df[0], 1: [pd.NaT, pd.Timedelta(2), pd.Timedelta(2)]} - ) + expected = DataFrame({0: df[0], 1: [pd.NaT, pd.Timedelta(2), pd.Timedelta(2)]}) tm.assert_equal(result, expected) result = df.diff(0) @@ -176,7 +174,7 @@ def test_diff_axis(self): def test_diff_period(self): # GH#32995 Don't pass an incorrect axis pi = pd.date_range("2016-01-01", periods=3).to_period("D") - df = pd.DataFrame({"A": pi}) + df = DataFrame({"A": pi}) result = df.diff(1, axis=1) @@ -185,24 +183,24 @@ def test_diff_period(self): def test_diff_axis1_mixed_dtypes(self): # GH#32995 operate column-wise when we have mixed dtypes and axis=1 - df = pd.DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)}) + df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)}) - expected = pd.DataFrame({"A": [np.nan, np.nan, np.nan], "B": df["B"] / 2}) + expected = DataFrame({"A": [np.nan, np.nan, np.nan], "B": df["B"] / 2}) result = df.diff(axis=1) tm.assert_frame_equal(result, expected) # GH#21437 mixed-float-dtypes - df = pd.DataFrame( + df = DataFrame( {"a": np.arange(3, dtype="float32"), "b": np.arange(3, dtype="float64")} ) result = df.diff(axis=1) - expected = pd.DataFrame({"a": df["a"] * np.nan, "b": df["b"] * 0}) + expected = DataFrame({"a": df["a"] * np.nan, "b": df["b"] * 0}) tm.assert_frame_equal(result, expected) def test_diff_axis1_mixed_dtypes_large_periods(self): # GH#32995 operate column-wise when we have mixed dtypes and axis=1 - df = pd.DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)}) + df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)}) expected = df * np.nan @@ -211,19 +209,19 @@ def test_diff_axis1_mixed_dtypes_large_periods(self): def test_diff_axis1_mixed_dtypes_negative_periods(self): # GH#32995 operate column-wise when we have mixed dtypes and axis=1 - df = pd.DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)}) + df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)}) - expected = pd.DataFrame({"A": -1.0 * df["A"], "B": df["B"] * np.nan}) + expected = DataFrame({"A": -1.0 * df["A"], "B": df["B"] * np.nan}) result = df.diff(axis=1, periods=-1) tm.assert_frame_equal(result, expected) def test_diff_sparse(self): # GH#28813 .diff() should work for sparse dataframes as well - sparse_df = pd.DataFrame([[0, 1], [1, 0]], dtype="Sparse[int]") + sparse_df = DataFrame([[0, 1], [1, 0]], dtype="Sparse[int]") result = sparse_df.diff() - expected = pd.DataFrame( + expected = DataFrame( [[np.nan, np.nan], [1.0, -1.0]], dtype=pd.SparseDtype("float", 0.0) ) @@ -234,7 +232,7 @@ def test_diff_sparse(self): [ ( 0, - pd.DataFrame( + DataFrame( { "a": [np.nan, 0, 1, 0, np.nan, np.nan, np.nan, 0], "b": [np.nan, 1, np.nan, np.nan, -2, 1, np.nan, np.nan], @@ -246,7 +244,7 @@ def test_diff_sparse(self): ), ( 1, - pd.DataFrame( + DataFrame( { "a": np.repeat(np.nan, 8), "b": [0, 1, np.nan, 1, np.nan, np.nan, np.nan, 0], @@ -260,7 +258,7 @@ def test_diff_sparse(self): ) def test_diff_integer_na(self, axis, expected): # GH#24171 IntegerNA Support for DataFrame.diff() - df = pd.DataFrame( + df = DataFrame( { "a": np.repeat([0, 1, np.nan, 2], 2), "b": np.tile([0, 1, np.nan, 2], 2), @@ -278,7 +276,7 @@ def test_diff_readonly(self): # https://github.com/pandas-dev/pandas/issues/35559 arr = np.random.randn(5, 2) arr.flags.writeable = False - df = pd.DataFrame(arr) + df = DataFrame(arr) result = df.diff() - expected = pd.DataFrame(np.array(df)).diff() + expected = DataFrame(np.array(df)).diff() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index da369658078a0..c45d774b3bb9e 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -21,7 +21,7 @@ def test_drop_raise_exception_if_labels_not_in_level(msg, labels, level): # GH 8594 mi = pd.MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"]) s = pd.Series([10, 20, 30], index=mi) - df = pd.DataFrame([10, 20, 30], index=mi) + df = DataFrame([10, 20, 30], index=mi) with pytest.raises(KeyError, match=msg): s.drop(labels, level=level) @@ -34,7 +34,7 @@ def test_drop_errors_ignore(labels, level): # GH 8594 mi = pd.MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"]) s = pd.Series([10, 20, 30], index=mi) - df = pd.DataFrame([10, 20, 30], index=mi) + df = DataFrame([10, 20, 30], index=mi) expected_s = s.drop(labels, level=level, errors="ignore") tm.assert_series_equal(s, expected_s) @@ -47,7 +47,7 @@ def test_drop_with_non_unique_datetime_index_and_invalid_keys(): # GH 30399 # define dataframe with unique datetime index - df = pd.DataFrame( + df = DataFrame( np.random.randn(5, 3), columns=["a", "b", "c"], index=pd.date_range("2012", freq="H", periods=5), @@ -148,7 +148,7 @@ def test_drop(self): # inplace cache issue # GH#5628 - df = pd.DataFrame(np.random.randn(10, 3), columns=list("abc")) + df = DataFrame(np.random.randn(10, 3), columns=list("abc")) expected = df[~(df.b > 0)] return_value = df.drop(labels=df[df.b > 0].index, inplace=True) assert return_value is None @@ -252,15 +252,15 @@ def test_raise_on_drop_duplicate_index(self, actual): def test_drop_empty_list(self, index, drop_labels): # GH#21494 expected_index = [i for i in index if i not in drop_labels] - frame = pd.DataFrame(index=index).drop(drop_labels) - tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index)) + frame = DataFrame(index=index).drop(drop_labels) + tm.assert_frame_equal(frame, DataFrame(index=expected_index)) @pytest.mark.parametrize("index", [[1, 2, 3], [1, 2, 2]]) @pytest.mark.parametrize("drop_labels", [[1, 4], [4, 5]]) def test_drop_non_empty_list(self, index, drop_labels): # GH# 21494 with pytest.raises(KeyError, match="not found in axis"): - pd.DataFrame(index=index).drop(drop_labels) + DataFrame(index=index).drop(drop_labels) def test_mixed_depth_drop(self): arrays = [ @@ -427,7 +427,7 @@ def test_drop_preserve_names(self): @pytest.mark.parametrize("inplace", [False, True]) def test_inplace_drop_and_operation(self, operation, inplace): # GH#30484 - df = pd.DataFrame({"x": range(5)}) + df = DataFrame({"x": range(5)}) expected = df.copy() df["y"] = range(5) y = df["y"] diff --git a/pandas/tests/frame/methods/test_filter.py b/pandas/tests/frame/methods/test_filter.py index 569b2fe21d1c2..af77db4058b43 100644 --- a/pandas/tests/frame/methods/test_filter.py +++ b/pandas/tests/frame/methods/test_filter.py @@ -133,7 +133,7 @@ def test_filter_corner(self): def test_filter_regex_non_string(self): # GH#5798 trying to filter on non-string columns should drop, # not raise - df = pd.DataFrame(np.random.random((3, 2)), columns=["STRING", 123]) + df = DataFrame(np.random.random((3, 2)), columns=["STRING", 123]) result = df.filter(regex="STRING") expected = df[["STRING"]] tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_isin.py b/pandas/tests/frame/methods/test_isin.py index fb3fbacaf2627..5e50e63016f26 100644 --- a/pandas/tests/frame/methods/test_isin.py +++ b/pandas/tests/frame/methods/test_isin.py @@ -87,7 +87,7 @@ def test_isin_df(self): def test_isin_tuples(self): # GH#16394 - df = pd.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "f"]}) + df = DataFrame({"A": [1, 2, 3], "B": ["a", "b", "f"]}) df["C"] = list(zip(df["A"], df["B"])) result = df["C"].isin([(1, "a")]) tm.assert_series_equal(result, Series([True, False, False], name="C")) @@ -124,7 +124,7 @@ def test_isin_dupe_self(self): tm.assert_frame_equal(result, expected) def test_isin_against_series(self): - df = pd.DataFrame( + df = DataFrame( {"A": [1, 2, 3, 4], "B": [2, np.nan, 4, 4]}, index=["a", "b", "c", "d"] ) s = Series([1, 3, 11, 4], index=["a", "b", "c", "d"]) @@ -193,13 +193,13 @@ def test_isin_empty_datetimelike(self): @pytest.mark.parametrize( "values", [ - pd.DataFrame({"a": [1, 2, 3]}, dtype="category"), + DataFrame({"a": [1, 2, 3]}, dtype="category"), Series([1, 2, 3], dtype="category"), ], ) def test_isin_category_frame(self, values): # GH#34256 - df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) expected = DataFrame({"a": [True, True, True], "b": [False, False, False]}) result = df.isin(values) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 80e57b9d71a85..5cdd65b8cf6e2 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -11,7 +11,7 @@ class TestDataFrameQuantile: "df,expected", [ [ - pd.DataFrame( + DataFrame( { 0: Series(pd.arrays.SparseArray([1, 2])), 1: Series(pd.arrays.SparseArray([3, 4])), @@ -20,7 +20,7 @@ class TestDataFrameQuantile: Series([1.5, 3.5], name=0.5), ], [ - pd.DataFrame(Series([0.0, None, 1.0, 2.0], dtype="Sparse[float]")), + DataFrame(Series([0.0, None, 1.0, 2.0], dtype="Sparse[float]")), Series([1.0], name=0.5), ], ], @@ -79,7 +79,7 @@ def test_quantile_date_range(self): dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") ser = Series(dti) - df = pd.DataFrame(ser) + df = DataFrame(ser) result = df.quantile(numeric_only=False) expected = Series( @@ -319,7 +319,7 @@ def test_quantile_box(self): tm.assert_series_equal(res, exp) res = df.quantile([0.5], numeric_only=False) - exp = pd.DataFrame( + exp = DataFrame( [ [ pd.Timestamp("2011-01-02"), @@ -391,7 +391,7 @@ def test_quantile_box(self): tm.assert_series_equal(res, exp) res = df.quantile([0.5], numeric_only=False) - exp = pd.DataFrame( + exp = DataFrame( [ [ pd.Timestamp("2011-01-02"), @@ -506,7 +506,7 @@ def test_quantile_empty_no_rows(self): def test_quantile_empty_no_columns(self): # GH#23925 _get_numeric_data may drop all columns - df = pd.DataFrame(pd.date_range("1/1/18", periods=5)) + df = DataFrame(pd.date_range("1/1/18", periods=5)) df.columns.name = "captain tightpants" result = df.quantile(0.5) expected = Series([], index=[], name=0.5, dtype=np.float64) @@ -514,6 +514,6 @@ def test_quantile_empty_no_columns(self): tm.assert_series_equal(result, expected) result = df.quantile([0.5]) - expected = pd.DataFrame([], index=[0.5], columns=[]) + expected = DataFrame([], index=[0.5], columns=[]) expected.columns.name = "captain tightpants" tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 99a3bbdf5ffe3..99494191c043a 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -76,7 +76,7 @@ def test_reindex(self, float_frame): assert result is not float_frame def test_reindex_nan(self): - df = pd.DataFrame( + df = DataFrame( [[1, 2], [3, 5], [7, 11], [9, 23]], index=[2, np.nan, 1, 5], columns=["joe", "jim"], @@ -89,7 +89,7 @@ def test_reindex_nan(self): tm.assert_frame_equal(df.reindex(i), df.iloc[j], check_index_type=False) # GH10388 - df = pd.DataFrame( + df = DataFrame( { "other": ["a", "b", np.nan, "c"], "date": ["2015-03-22", np.nan, "2012-01-08", np.nan], @@ -263,8 +263,8 @@ def test_reindex_dups(self): def test_reindex_axis_style(self): # https://github.com/pandas-dev/pandas/issues/12392 - df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - expected = pd.DataFrame( + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + expected = DataFrame( {"A": [1, 2, np.nan], "B": [4, 5, np.nan]}, index=[0, 1, 3] ) result = df.reindex([0, 1, 3]) @@ -278,8 +278,8 @@ def test_reindex_axis_style(self): def test_reindex_positional_warns(self): # https://github.com/pandas-dev/pandas/issues/12392 - df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - expected = pd.DataFrame({"A": [1.0, 2], "B": [4.0, 5], "C": [np.nan, np.nan]}) + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + expected = DataFrame({"A": [1.0, 2], "B": [4.0, 5], "C": [np.nan, np.nan]}) with tm.assert_produces_warning(FutureWarning): result = df.reindex([0, 1], ["A", "B", "C"]) @@ -287,7 +287,7 @@ def test_reindex_positional_warns(self): def test_reindex_axis_style_raises(self): # https://github.com/pandas-dev/pandas/issues/12392 - df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex([0, 1], ["A"], axis=1) @@ -322,9 +322,9 @@ def test_reindex_axis_style_raises(self): def test_reindex_single_named_indexer(self): # https://github.com/pandas-dev/pandas/issues/12392 - df = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}) + df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}) result = df.reindex([0, 1], columns=["A"]) - expected = pd.DataFrame({"A": [1, 2]}) + expected = DataFrame({"A": [1, 2]}) tm.assert_frame_equal(result, expected) def test_reindex_api_equivalence(self): @@ -444,9 +444,9 @@ def test_reindex_multi_categorical_time(self): Categorical(date_range("2012-01-01", periods=3, freq="H")), ] ) - df = pd.DataFrame({"a": range(len(midx))}, index=midx) + df = DataFrame({"a": range(len(midx))}, index=midx) df2 = df.iloc[[0, 1, 2, 3, 4, 5, 6, 8]] result = df2.reindex(midx) - expected = pd.DataFrame({"a": [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx) + expected = DataFrame({"a": [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 569677f1fec5e..2c909ab2f8227 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -553,13 +553,13 @@ def test_regex_replace_dict_nested(self, mix_abc): def test_regex_replace_dict_nested_non_first_character(self): # GH 25259 - df = pd.DataFrame({"first": ["abc", "bca", "cab"]}) - expected = pd.DataFrame({"first": [".bc", "bc.", "c.b"]}) + df = DataFrame({"first": ["abc", "bca", "cab"]}) + expected = DataFrame({"first": [".bc", "bc.", "c.b"]}) result = df.replace({"a": "."}, regex=True) tm.assert_frame_equal(result, expected) def test_regex_replace_dict_nested_gh4115(self): - df = pd.DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2}) + df = DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2}) expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2}) result = df.replace({"Type": {"Q": 0, "T": 1}}) tm.assert_frame_equal(result, expected) @@ -669,11 +669,11 @@ def test_replace(self, datetime_frame): # GH 11698 # test for mixed data types. - df = pd.DataFrame( + df = DataFrame( [("-", pd.to_datetime("20150101")), ("a", pd.to_datetime("20150102"))] ) df1 = df.replace("-", np.nan) - expected_df = pd.DataFrame( + expected_df = DataFrame( [(np.nan, pd.to_datetime("20150101")), ("a", pd.to_datetime("20150102"))] ) tm.assert_frame_equal(df1, expected_df) @@ -712,7 +712,7 @@ def test_replace_list(self): def test_replace_with_empty_list(self): # GH 21977 s = Series([["a", "b"], [], np.nan, [1]]) - df = pd.DataFrame({"col": s}) + df = DataFrame({"col": s}) expected = df result = df.replace([], np.nan) tm.assert_frame_equal(result, expected) @@ -1162,7 +1162,7 @@ def test_replace_with_dict_with_bool_keys(self): def test_replace_dict_strings_vs_ints(self): # GH#34789 - df = pd.DataFrame({"Y0": [1, 2], "Y1": [3, 4]}) + df = DataFrame({"Y0": [1, 2], "Y1": [3, 4]}) result = df.replace({"replace_string": "test"}) tm.assert_frame_equal(result, df) @@ -1196,14 +1196,14 @@ def test_nested_dict_overlapping_keys_replace_str(self): tm.assert_frame_equal(result, expected) def test_replace_swapping_bug(self): - df = pd.DataFrame({"a": [True, False, True]}) + df = DataFrame({"a": [True, False, True]}) res = df.replace({"a": {True: "Y", False: "N"}}) - expect = pd.DataFrame({"a": ["Y", "N", "Y"]}) + expect = DataFrame({"a": ["Y", "N", "Y"]}) tm.assert_frame_equal(res, expect) - df = pd.DataFrame({"a": [0, 1, 0]}) + df = DataFrame({"a": [0, 1, 0]}) res = df.replace({"a": {0: "Y", 1: "N"}}) - expect = pd.DataFrame({"a": ["Y", "N", "Y"]}) + expect = DataFrame({"a": ["Y", "N", "Y"]}) tm.assert_frame_equal(res, expect) def test_replace_period(self): @@ -1221,7 +1221,7 @@ def test_replace_period(self): } } - df = pd.DataFrame( + df = DataFrame( [ "out_augmented_AUG_2012.json", "out_augmented_SEP_2013.json", @@ -1255,7 +1255,7 @@ def test_replace_datetime(self): } } - df = pd.DataFrame( + df = DataFrame( [ "out_augmented_AUG_2012.json", "out_augmented_SEP_2013.json", @@ -1453,9 +1453,9 @@ def test_replace_commutative(self, df, to_replace, exp): # DataFrame.replace() overwrites when values are non-numeric # also added to data frame whilst issue was for series - df = pd.DataFrame(df) + df = DataFrame(df) - expected = pd.DataFrame(exp) + expected = DataFrame(exp) result = df.replace(to_replace) tm.assert_frame_equal(result, expected) @@ -1471,22 +1471,22 @@ def test_replace_commutative(self, df, to_replace, exp): ) def test_replace_replacer_dtype(self, replacer): # GH26632 - df = pd.DataFrame(["a"]) + df = DataFrame(["a"]) result = df.replace({"a": replacer, "b": replacer}) - expected = pd.DataFrame([replacer]) + expected = DataFrame([replacer]) tm.assert_frame_equal(result, expected) def test_replace_after_convert_dtypes(self): # GH31517 - df = pd.DataFrame({"grp": [1, 2, 3, 4, 5]}, dtype="Int64") + df = DataFrame({"grp": [1, 2, 3, 4, 5]}, dtype="Int64") result = df.replace(1, 10) - expected = pd.DataFrame({"grp": [10, 2, 3, 4, 5]}, dtype="Int64") + expected = DataFrame({"grp": [10, 2, 3, 4, 5]}, dtype="Int64") tm.assert_frame_equal(result, expected) def test_replace_invalid_to_replace(self): # GH 18634 # API: replace() should raise an exception if invalid argument is given - df = pd.DataFrame({"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}) + df = DataFrame({"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}) msg = ( r"Expecting 'to_replace' to be either a scalar, array-like, " r"dict or None, got invalid type.*" @@ -1498,17 +1498,17 @@ def test_replace_invalid_to_replace(self): @pytest.mark.parametrize("value", [np.nan, pd.NA]) def test_replace_no_replacement_dtypes(self, dtype, value): # https://github.com/pandas-dev/pandas/issues/32988 - df = pd.DataFrame(np.eye(2), dtype=dtype) + df = DataFrame(np.eye(2), dtype=dtype) result = df.replace(to_replace=[None, -np.inf, np.inf], value=value) tm.assert_frame_equal(result, df) @pytest.mark.parametrize("replacement", [np.nan, 5]) def test_replace_with_duplicate_columns(self, replacement): # GH 24798 - result = pd.DataFrame({"A": [1, 2, 3], "A1": [4, 5, 6], "B": [7, 8, 9]}) + result = DataFrame({"A": [1, 2, 3], "A1": [4, 5, 6], "B": [7, 8, 9]}) result.columns = list("AAB") - expected = pd.DataFrame( + expected = DataFrame( {"A": [1, 2, 3], "A1": [4, 5, 6], "B": [replacement, 8, 9]} ) expected.columns = list("AAB") @@ -1525,9 +1525,9 @@ def test_replace_period_ignore_float(self): Regression test for GH#34871: if df.replace(1.0, 0.0) is called on a df with a Period column the old, faulty behavior is to raise TypeError. """ - df = pd.DataFrame({"Per": [pd.Period("2020-01")] * 3}) + df = DataFrame({"Per": [pd.Period("2020-01")] * 3}) result = df.replace(1.0, 0.0) - expected = pd.DataFrame({"Per": [pd.Period("2020-01")] * 3}) + expected = DataFrame({"Per": [pd.Period("2020-01")] * 3}) tm.assert_frame_equal(expected, result) def test_replace_value_category_type(self): @@ -1545,7 +1545,7 @@ def test_replace_value_category_type(self): "col5": ["obj1", "obj2", "obj3", "obj4"], } # explicitly cast columns as category and order them - input_df = pd.DataFrame(data=input_dict).astype( + input_df = DataFrame(data=input_dict).astype( {"col2": "category", "col4": "category"} ) input_df["col2"] = input_df["col2"].cat.reorder_categories( @@ -1564,7 +1564,7 @@ def test_replace_value_category_type(self): "col5": ["obj9", "obj2", "obj3", "obj4"], } # explicitly cast columns as category and order them - expected = pd.DataFrame(data=expected_dict).astype( + expected = DataFrame(data=expected_dict).astype( {"col2": "category", "col4": "category"} ) expected["col2"] = expected["col2"].cat.reorder_categories( @@ -1594,14 +1594,14 @@ def test_replace_dict_category_type(self, input_category_df, expected_category_d # create input dataframe input_dict = {"col1": ["a"], "col2": ["obj1"], "col3": ["cat1"]} # explicitly cast columns as category - input_df = pd.DataFrame(data=input_dict).astype( + input_df = DataFrame(data=input_dict).astype( {"col1": "category", "col2": "category", "col3": "category"} ) # create expected dataframe expected_dict = {"col1": ["z"], "col2": ["obj9"], "col3": ["catX"]} # explicitly cast columns as category - expected = pd.DataFrame(data=expected_dict).astype( + expected = DataFrame(data=expected_dict).astype( {"col1": "category", "col2": "category", "col3": "category"} ) @@ -1612,23 +1612,23 @@ def test_replace_dict_category_type(self, input_category_df, expected_category_d def test_replace_with_compiled_regex(self): # https://github.com/pandas-dev/pandas/issues/35680 - df = pd.DataFrame(["a", "b", "c"]) + df = DataFrame(["a", "b", "c"]) regex = re.compile("^a$") result = df.replace({regex: "z"}, regex=True) - expected = pd.DataFrame(["z", "b", "c"]) + expected = DataFrame(["z", "b", "c"]) tm.assert_frame_equal(result, expected) def test_replace_intervals(self): # https://github.com/pandas-dev/pandas/issues/35931 - df = pd.DataFrame({"a": [pd.Interval(0, 1), pd.Interval(0, 1)]}) + df = DataFrame({"a": [pd.Interval(0, 1), pd.Interval(0, 1)]}) result = df.replace({"a": {pd.Interval(0, 1): "x"}}) - expected = pd.DataFrame({"a": ["x", "x"]}) + expected = DataFrame({"a": ["x", "x"]}) tm.assert_frame_equal(result, expected) def test_replace_unicode(self): # GH: 16784 columns_values_map = {"positive": {"正面": 1, "中立": 1, "负面": 0}} - df1 = pd.DataFrame({"positive": np.ones(3)}) + df1 = DataFrame({"positive": np.ones(3)}) result = df1.replace(columns_values_map) - expected = pd.DataFrame({"positive": np.ones(3)}) + expected = DataFrame({"positive": np.ones(3)}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_round.py b/pandas/tests/frame/methods/test_round.py index db97a3e2a0e4f..5cf5aea8846c5 100644 --- a/pandas/tests/frame/methods/test_round.py +++ b/pandas/tests/frame/methods/test_round.py @@ -168,7 +168,7 @@ def test_round_mixed_type(self): def test_round_with_duplicate_columns(self): # GH#11611 - df = pd.DataFrame( + df = DataFrame( np.random.random([3, 3]), columns=["A", "B", "C"], index=["first", "second", "third"], @@ -195,7 +195,7 @@ def test_round_builtin(self): def test_round_nonunique_categorical(self): # See GH#21809 idx = pd.CategoricalIndex(["low"] * 3 + ["hi"] * 3) - df = pd.DataFrame(np.random.rand(6, 3), columns=list("abc")) + df = DataFrame(np.random.rand(6, 3), columns=list("abc")) expected = df.round(3) expected.index = idx diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 5daecd6a475aa..2e21ce8ec2256 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -131,7 +131,7 @@ def test_shift_duplicate_columns(self): shifted = [] for columns in column_lists: - df = pd.DataFrame(data.copy(), columns=columns) + df = DataFrame(data.copy(), columns=columns) for s in range(5): df.iloc[:, s] = df.iloc[:, s].shift(s + 1) df.columns = range(5) @@ -147,8 +147,8 @@ def test_shift_duplicate_columns(self): def test_shift_axis1_multiple_blocks(self): # GH#35488 - df1 = pd.DataFrame(np.random.randint(1000, size=(5, 3))) - df2 = pd.DataFrame(np.random.randint(1000, size=(5, 2))) + df1 = DataFrame(np.random.randint(1000, size=(5, 3))) + df2 = DataFrame(np.random.randint(1000, size=(5, 2))) df3 = pd.concat([df1, df2], axis=1) assert len(df3._mgr.blocks) == 2 @@ -284,13 +284,11 @@ def test_shift_dt64values_int_fill_deprecated(self): tm.assert_frame_equal(result, expected) # axis = 1 - df2 = pd.DataFrame({"A": ser, "B": ser}) + df2 = DataFrame({"A": ser, "B": ser}) df2._consolidate_inplace() with tm.assert_produces_warning(FutureWarning): result = df2.shift(1, axis=1, fill_value=0) - expected = pd.DataFrame( - {"A": [pd.Timestamp(0), pd.Timestamp(0)], "B": df2["A"]} - ) + expected = DataFrame({"A": [pd.Timestamp(0), pd.Timestamp(0)], "B": df2["A"]}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index a106702aff807..55450a693c2e6 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -352,7 +352,7 @@ def test_sort_index_multiindex(self, level): expected_mi = MultiIndex.from_tuples( [[1, 1, 1], [2, 1, 2], [2, 1, 3]], names=list("ABC") ) - expected = pd.DataFrame([[5, 6], [3, 4], [1, 2]], index=expected_mi) + expected = DataFrame([[5, 6], [3, 4], [1, 2]], index=expected_mi) result = df.sort_index(level=level) tm.assert_frame_equal(result, expected) @@ -360,7 +360,7 @@ def test_sort_index_multiindex(self, level): expected_mi = MultiIndex.from_tuples( [[1, 1, 1], [2, 1, 3], [2, 1, 2]], names=list("ABC") ) - expected = pd.DataFrame([[5, 6], [1, 2], [3, 4]], index=expected_mi) + expected = DataFrame([[5, 6], [1, 2], [3, 4]], index=expected_mi) result = df.sort_index(level=level, sort_remaining=False) tm.assert_frame_equal(result, expected) @@ -736,14 +736,14 @@ def test_sort_multi_index_key_str(self): tm.assert_frame_equal(result, expected) def test_changes_length_raises(self): - df = pd.DataFrame({"A": [1, 2, 3]}) + df = DataFrame({"A": [1, 2, 3]}) with pytest.raises(ValueError, match="change the shape"): df.sort_index(key=lambda x: x[:1]) def test_sort_index_multiindex_sparse_column(self): # GH 29735, testing that sort_index on a multiindexed frame with sparse # columns fills with 0. - expected = pd.DataFrame( + expected = DataFrame( { i: pd.array([0.0, 0.0, 0.0, 0.0], dtype=pd.SparseDtype("float64", 0.0)) for i in range(0, 4) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index 0ca232ec433e7..d59dc08b94563 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -130,7 +130,7 @@ def test_sort_values_multicolumn_uint64(self): # GH#9918 # uint64 multicolumn sort - df = pd.DataFrame( + df = DataFrame( { "a": pd.Series([18446637057563306014, 1162265347240853609]), "b": pd.Series([1, 2]), @@ -139,7 +139,7 @@ def test_sort_values_multicolumn_uint64(self): df["a"] = df["a"].astype(np.uint64) result = df.sort_values(["a", "b"]) - expected = pd.DataFrame( + expected = DataFrame( { "a": pd.Series([18446637057563306014, 1162265347240853609]), "b": pd.Series([1, 2]), @@ -355,14 +355,14 @@ def test_sort_nat(self): Timestamp(x) for x in ["2017-01-01", "2014-01-01", "2016-01-01", "2015-01-01"] ] - df = pd.DataFrame({"a": d1, "b": d2}, index=[0, 1, 2, 3]) + df = DataFrame({"a": d1, "b": d2}, index=[0, 1, 2, 3]) d3 = [Timestamp(x) for x in ["2015-01-01", "2016-01-01", "2016-01-01", np.nan]] d4 = [ Timestamp(x) for x in ["2014-01-01", "2015-01-01", "2017-01-01", "2016-01-01"] ] - expected = pd.DataFrame({"a": d3, "b": d4}, index=[1, 3, 0, 2]) + expected = DataFrame({"a": d3, "b": d4}, index=[1, 3, 0, 2]) sorted_df = df.sort_values(by=["a", "b"]) tm.assert_frame_equal(sorted_df, expected) @@ -381,7 +381,7 @@ def test_sort_values_na_position_with_categories(self): reversed_category_indices = sorted(category_indices, reverse=True) reversed_na_indices = sorted(na_indices) - df = pd.DataFrame( + df = DataFrame( { column_name: pd.Categorical( ["A", np.nan, "B", np.nan, "C"], categories=categories, ordered=True @@ -461,19 +461,19 @@ def test_sort_values_nat(self): Timestamp(x) for x in ["2017-01-01", "2014-01-01", "2016-01-01", "2015-01-01"] ] - df = pd.DataFrame({"a": d1, "b": d2}, index=[0, 1, 2, 3]) + df = DataFrame({"a": d1, "b": d2}, index=[0, 1, 2, 3]) d3 = [Timestamp(x) for x in ["2015-01-01", "2016-01-01", "2016-01-01", np.nan]] d4 = [ Timestamp(x) for x in ["2014-01-01", "2015-01-01", "2017-01-01", "2016-01-01"] ] - expected = pd.DataFrame({"a": d3, "b": d4}, index=[1, 3, 0, 2]) + expected = DataFrame({"a": d3, "b": d4}, index=[1, 3, 0, 2]) sorted_df = df.sort_values(by=["a", "b"]) tm.assert_frame_equal(sorted_df, expected) def test_sort_values_na_position_with_categories_raises(self): - df = pd.DataFrame( + df = DataFrame( { "c": pd.Categorical( ["A", np.nan, "B", np.nan, "C"], @@ -525,7 +525,7 @@ def test_sort_values_ignore_index( def test_sort_values_nat_na_position_default(self): # GH 13230 - expected = pd.DataFrame( + expected = DataFrame( { "A": [1, 2, 3, 4, 4], "date": pd.DatetimeIndex( @@ -666,7 +666,7 @@ def test_sort_values_key_empty(self, sort_by_key): df.sort_index(key=sort_by_key) def test_changes_length_raises(self): - df = pd.DataFrame({"A": [1, 2, 3]}) + df = DataFrame({"A": [1, 2, 3]}) with pytest.raises(ValueError, match="change the shape"): df.sort_values("A", key=lambda x: x[:1]) @@ -696,7 +696,7 @@ def test_sort_values_key_dict_axis(self): def test_sort_values_key_casts_to_categorical(self, ordered): # https://github.com/pandas-dev/pandas/issues/36383 categories = ["c", "b", "a"] - df = pd.DataFrame({"x": [1, 1, 1], "y": ["a", "b", "c"]}) + df = DataFrame({"x": [1, 1, 1], "y": ["a", "b", "c"]}) def sorter(key): if key.name == "y": @@ -706,7 +706,7 @@ def sorter(key): return key result = df.sort_values(by=["x", "y"], key=sorter) - expected = pd.DataFrame( + expected = DataFrame( {"x": [1, 1, 1], "y": ["c", "b", "a"]}, index=pd.Index([2, 1, 0]) ) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 9cf5afc09e800..fefe1392087dd 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -463,7 +463,7 @@ def test_nunique(self): @pytest.mark.parametrize("tz", [None, "UTC"]) def test_mean_mixed_datetime_numeric(self, tz): # https://github.com/pandas-dev/pandas/issues/24752 - df = pd.DataFrame({"A": [1, 1], "B": [pd.Timestamp("2000", tz=tz)] * 2}) + df = DataFrame({"A": [1, 1], "B": [pd.Timestamp("2000", tz=tz)] * 2}) with tm.assert_produces_warning(FutureWarning): result = df.mean() expected = Series([1.0], index=["A"]) @@ -474,7 +474,7 @@ def test_mean_excludes_datetimes(self, tz): # https://github.com/pandas-dev/pandas/issues/24752 # Our long-term desired behavior is unclear, but the behavior in # 0.24.0rc1 was buggy. - df = pd.DataFrame({"A": [pd.Timestamp("2000", tz=tz)] * 2}) + df = DataFrame({"A": [pd.Timestamp("2000", tz=tz)] * 2}) with tm.assert_produces_warning(FutureWarning): result = df.mean() @@ -498,7 +498,7 @@ def test_mean_mixed_string_decimal(self): {"A": 5, "B": None, "C": Decimal("1223.00")}, ] - df = pd.DataFrame(d) + df = DataFrame(d) result = df.mean() expected = Series([2.7, 681.6], index=["A", "C"]) @@ -766,9 +766,7 @@ def test_sum_corner(self): @pytest.mark.parametrize("method, unit", [("sum", 0), ("prod", 1)]) def test_sum_prod_nanops(self, method, unit): idx = ["a", "b", "c"] - df = pd.DataFrame( - {"a": [unit, unit], "b": [unit, np.nan], "c": [np.nan, np.nan]} - ) + df = DataFrame({"a": [unit, unit], "b": [unit, np.nan], "c": [np.nan, np.nan]}) # The default result = getattr(df, method) expected = Series([unit, unit, unit], index=idx, dtype="float64") @@ -788,7 +786,7 @@ def test_sum_prod_nanops(self, method, unit): tm.assert_series_equal(result, expected) # min_count > 1 - df = pd.DataFrame({"A": [unit] * 10, "B": [unit] * 5 + [np.nan] * 5}) + df = DataFrame({"A": [unit] * 10, "B": [unit] * 5 + [np.nan] * 5}) result = getattr(df, method)(min_count=5) expected = Series(result, index=["A", "B"]) tm.assert_series_equal(result, expected) @@ -800,7 +798,7 @@ def test_sum_prod_nanops(self, method, unit): def test_sum_nanops_timedelta(self): # prod isn't defined on timedeltas idx = ["a", "b", "c"] - df = pd.DataFrame({"a": [0, 0], "b": [0, np.nan], "c": [np.nan, np.nan]}) + df = DataFrame({"a": [0, 0], "b": [0, np.nan], "c": [np.nan, np.nan]}) df2 = df.apply(pd.to_timedelta) @@ -832,7 +830,7 @@ def test_sum_bool(self, float_frame): def test_sum_mixed_datetime(self): # GH#30886 - df = pd.DataFrame( + df = DataFrame( {"A": pd.date_range("2000", periods=4), "B": [1, 2, 3, 4]} ).reindex([2, 3, 4]) result = df.sum() @@ -861,7 +859,7 @@ def test_mean_datetimelike(self): # GH#24757 check that datetimelike are excluded by default, handled # correctly with numeric_only=True - df = pd.DataFrame( + df = DataFrame( { "A": np.arange(3), "B": pd.date_range("2016-01-01", periods=3), @@ -880,7 +878,7 @@ def test_mean_datetimelike(self): tm.assert_series_equal(result, expected) def test_mean_datetimelike_numeric_only_false(self): - df = pd.DataFrame( + df = DataFrame( { "A": np.arange(3), "B": pd.date_range("2016-01-01", periods=3), @@ -902,9 +900,9 @@ def test_mean_datetimelike_numeric_only_false(self): def test_mean_extensionarray_numeric_only_true(self): # https://github.com/pandas-dev/pandas/issues/33256 arr = np.random.randint(1000, size=(10, 5)) - df = pd.DataFrame(arr, dtype="Int64") + df = DataFrame(arr, dtype="Int64") result = df.mean(numeric_only=True) - expected = pd.DataFrame(arr).mean() + expected = DataFrame(arr).mean() tm.assert_series_equal(result, expected) def test_stats_mixed_type(self, float_string_frame): @@ -1134,7 +1132,7 @@ def test_series_broadcasting(self): class TestDataFrameReductions: def test_min_max_dt64_with_NaT(self): # Both NaT and Timestamp are in DataFrame. - df = pd.DataFrame({"foo": [pd.NaT, pd.NaT, pd.Timestamp("2012-05-01")]}) + df = DataFrame({"foo": [pd.NaT, pd.NaT, pd.Timestamp("2012-05-01")]}) res = df.min() exp = Series([pd.Timestamp("2012-05-01")], index=["foo"]) @@ -1145,7 +1143,7 @@ def test_min_max_dt64_with_NaT(self): tm.assert_series_equal(res, exp) # GH12941, only NaTs are in DataFrame. - df = pd.DataFrame({"foo": [pd.NaT, pd.NaT]}) + df = DataFrame({"foo": [pd.NaT, pd.NaT]}) res = df.min() exp = Series([pd.NaT], index=["foo"]) @@ -1160,7 +1158,7 @@ def test_min_max_dt64_api_consistency_with_NaT(self): # returned NaT for series. These tests check that the API is consistent in # min/max calls on empty Series/DataFrames. See GH:33704 for more # information - df = pd.DataFrame(dict(x=pd.to_datetime([]))) + df = DataFrame(dict(x=pd.to_datetime([]))) expected_dt_series = Series(pd.to_datetime([])) # check axis 0 assert (df.min(axis=0).x is pd.NaT) == (expected_dt_series.min() is pd.NaT) @@ -1173,7 +1171,7 @@ def test_min_max_dt64_api_consistency_with_NaT(self): def test_min_max_dt64_api_consistency_empty_df(self): # check DataFrame/Series api consistency when calling min/max on an empty # DataFrame/Series. - df = pd.DataFrame(dict(x=[])) + df = DataFrame(dict(x=[])) expected_float_series = Series([], dtype=float) # check axis 0 assert np.isnan(df.min(axis=0).x) == np.isnan(expected_float_series.min()) @@ -1198,7 +1196,7 @@ def test_preserve_timezone(self, initial: str, method): def test_mixed_frame_with_integer_sum(): # https://github.com/pandas-dev/pandas/issues/34520 - df = pd.DataFrame([["a", 1]], columns=list("ab")) + df = DataFrame([["a", 1]], columns=list("ab")) df = df.astype({"b": "Int64"}) result = df.sum() expected = Series(["a", 1], index=["a", "b"]) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index f5d1808f367e7..d6bc19091dcef 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -102,14 +102,14 @@ def test_column_contains_raises(self, float_frame): def test_tab_completion(self): # DataFrame whose columns are identifiers shall have them in __dir__. - df = pd.DataFrame([list("abcd"), list("efgh")], columns=list("ABCD")) + df = DataFrame([list("abcd"), list("efgh")], columns=list("ABCD")) for key in list("ABCD"): assert key in dir(df) assert isinstance(df.__getitem__("A"), pd.Series) # DataFrame whose first-level columns are identifiers shall have # them in __dir__. - df = pd.DataFrame( + df = DataFrame( [list("abcd"), list("efgh")], columns=pd.MultiIndex.from_tuples(list(zip("ABCD", "EFGH"))), ) @@ -342,27 +342,27 @@ def test_values_mixed_dtypes(self, float_frame, float_string_frame): tm.assert_almost_equal(arr, expected) def test_to_numpy(self): - df = pd.DataFrame({"A": [1, 2], "B": [3, 4.5]}) + df = DataFrame({"A": [1, 2], "B": [3, 4.5]}) expected = np.array([[1, 3], [2, 4.5]]) result = df.to_numpy() tm.assert_numpy_array_equal(result, expected) def test_to_numpy_dtype(self): - df = pd.DataFrame({"A": [1, 2], "B": [3, 4.5]}) + df = DataFrame({"A": [1, 2], "B": [3, 4.5]}) expected = np.array([[1, 3], [2, 4]], dtype="int64") result = df.to_numpy(dtype="int64") tm.assert_numpy_array_equal(result, expected) def test_to_numpy_copy(self): arr = np.random.randn(4, 3) - df = pd.DataFrame(arr) + df = DataFrame(arr) assert df.values.base is arr assert df.to_numpy(copy=False).base is arr assert df.to_numpy(copy=True).base is not arr def test_to_numpy_mixed_dtype_to_str(self): # https://github.com/pandas-dev/pandas/issues/35455 - df = pd.DataFrame([[pd.Timestamp("2020-01-01 00:00:00"), 100.0]]) + df = DataFrame([[pd.Timestamp("2020-01-01 00:00:00"), 100.0]]) result = df.to_numpy(dtype=str) expected = np.array([["2020-01-01 00:00:00", "100.0"]], dtype=str) tm.assert_numpy_array_equal(result, expected) @@ -529,7 +529,7 @@ async def test_tab_complete_warning(self, ip): pytest.importorskip("IPython", minversion="6.0.0") from IPython.core.completer import provisionalcompleter - code = "import pandas as pd; df = pd.DataFrame()" + code = "from pandas import DataFrame; df = DataFrame()" await ip.run_code(code) # TODO: remove it when Ipython updates @@ -547,7 +547,7 @@ async def test_tab_complete_warning(self, ip): list(ip.Completer.completions("df.", 1)) def test_attrs(self): - df = pd.DataFrame({"A": [2, 3]}) + df = DataFrame({"A": [2, 3]}) assert df.attrs == {} df.attrs["version"] = 1 @@ -556,7 +556,7 @@ def test_attrs(self): @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None]) def test_set_flags(self, allows_duplicate_labels): - df = pd.DataFrame({"A": [1, 2]}) + df = DataFrame({"A": [1, 2]}) result = df.set_flags(allows_duplicate_labels=allows_duplicate_labels) if allows_duplicate_labels is None: # We don't update when it's not provided diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 8db3feacfc7af..788ac56829a2b 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -23,7 +23,7 @@ class TestFrameComparisons: def test_frame_in_list(self): # GH#12689 this should raise at the DataFrame level, not blocks - df = pd.DataFrame(np.random.randn(6, 4), columns=list("ABCD")) + df = DataFrame(np.random.randn(6, 4), columns=list("ABCD")) msg = "The truth value of a DataFrame is ambiguous" with pytest.raises(ValueError, match=msg): df in [None] @@ -35,7 +35,7 @@ def check(df, df2): # we expect the result to match Series comparisons for # == and !=, inequalities should raise result = x == y - expected = pd.DataFrame( + expected = DataFrame( {col: x[col] == y[col] for col in x.columns}, index=x.index, columns=x.columns, @@ -43,7 +43,7 @@ def check(df, df2): tm.assert_frame_equal(result, expected) result = x != y - expected = pd.DataFrame( + expected = DataFrame( {col: x[col] != y[col] for col in x.columns}, index=x.index, columns=x.columns, @@ -71,15 +71,15 @@ def check(df, df2): # GH4968 # invalid date/int comparisons - df = pd.DataFrame(np.random.randint(10, size=(10, 1)), columns=["a"]) + df = DataFrame(np.random.randint(10, size=(10, 1)), columns=["a"]) df["dates"] = pd.date_range("20010101", periods=len(df)) df2 = df.copy() df2["dates"] = df["a"] check(df, df2) - df = pd.DataFrame(np.random.randint(10, size=(10, 2)), columns=["a", "b"]) - df2 = pd.DataFrame( + df = DataFrame(np.random.randint(10, size=(10, 2)), columns=["a", "b"]) + df2 = DataFrame( { "a": pd.date_range("20010101", periods=len(df)), "b": pd.date_range("20100101", periods=len(df)), @@ -90,7 +90,7 @@ def check(df, df2): def test_timestamp_compare(self): # make sure we can compare Timestamps on the right AND left hand side # GH#4982 - df = pd.DataFrame( + df = DataFrame( { "dates1": pd.date_range("20010101", periods=10), "dates2": pd.date_range("20010102", periods=10), @@ -129,8 +129,8 @@ def test_mixed_comparison(self): # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False, # not raise TypeError # (this appears to be fixed before GH#22163, not sure when) - df = pd.DataFrame([["1989-08-01", 1], ["1989-08-01", 2]]) - other = pd.DataFrame([["a", "b"], ["c", "d"]]) + df = DataFrame([["1989-08-01", 1], ["1989-08-01", 2]]) + other = DataFrame([["a", "b"], ["c", "d"]]) result = df == other assert not result.any().any() @@ -142,9 +142,9 @@ def test_df_boolean_comparison_error(self): # GH#4576, GH#22880 # comparing DataFrame against list/tuple with len(obj) matching # len(df.columns) is supported as of GH#22800 - df = pd.DataFrame(np.arange(6).reshape((3, 2))) + df = DataFrame(np.arange(6).reshape((3, 2))) - expected = pd.DataFrame([[False, False], [True, False], [False, False]]) + expected = DataFrame([[False, False], [True, False], [False, False]]) result = df == (2, 2) tm.assert_frame_equal(result, expected) @@ -153,15 +153,13 @@ def test_df_boolean_comparison_error(self): tm.assert_frame_equal(result, expected) def test_df_float_none_comparison(self): - df = pd.DataFrame( - np.random.randn(8, 3), index=range(8), columns=["A", "B", "C"] - ) + df = DataFrame(np.random.randn(8, 3), index=range(8), columns=["A", "B", "C"]) result = df.__eq__(None) assert not result.any().any() def test_df_string_comparison(self): - df = pd.DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}]) + df = DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}]) mask_a = df.a > 1 tm.assert_frame_equal(df[mask_a], df.loc[1:1, :]) tm.assert_frame_equal(df[-mask_a], df.loc[0:0, :]) @@ -176,8 +174,8 @@ class TestFrameFlexComparisons: def test_bool_flex_frame(self): data = np.random.randn(5, 3) other_data = np.random.randn(5, 3) - df = pd.DataFrame(data) - other = pd.DataFrame(other_data) + df = DataFrame(data) + other = DataFrame(other_data) ndim_5 = np.ones(df.shape + (1, 3)) # Unaligned @@ -265,8 +263,8 @@ def test_bool_flex_frame_complex_dtype(self): # complex arr = np.array([np.nan, 1, 6, np.nan]) arr2 = np.array([2j, np.nan, 7, None]) - df = pd.DataFrame({"a": arr}) - df2 = pd.DataFrame({"a": arr2}) + df = DataFrame({"a": arr}) + df2 = DataFrame({"a": arr2}) msg = "|".join( [ @@ -288,7 +286,7 @@ def test_bool_flex_frame_complex_dtype(self): assert rs.values.all() arr3 = np.array([2j, np.nan, None]) - df3 = pd.DataFrame({"a": arr3}) + df3 = DataFrame({"a": arr3}) with pytest.raises(TypeError, match=msg): # inequalities are not well-defined for complex numbers @@ -302,16 +300,16 @@ def test_bool_flex_frame_complex_dtype(self): def test_bool_flex_frame_object_dtype(self): # corner, dtype=object - df1 = pd.DataFrame({"col": ["foo", np.nan, "bar"]}) - df2 = pd.DataFrame({"col": ["foo", datetime.now(), "bar"]}) + df1 = DataFrame({"col": ["foo", np.nan, "bar"]}) + df2 = DataFrame({"col": ["foo", datetime.now(), "bar"]}) result = df1.ne(df2) - exp = pd.DataFrame({"col": [False, True, False]}) + exp = DataFrame({"col": [False, True, False]}) tm.assert_frame_equal(result, exp) def test_flex_comparison_nat(self): # GH 15697, GH 22163 df.eq(pd.NaT) should behave like df == pd.NaT, # and _definitely_ not be NaN - df = pd.DataFrame([pd.NaT]) + df = DataFrame([pd.NaT]) result = df == pd.NaT # result.iloc[0, 0] is a np.bool_ object @@ -329,7 +327,7 @@ def test_flex_comparison_nat(self): @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"]) def test_df_flex_cmp_constant_return_types(self, opname): # GH 15077, non-empty DataFrame - df = pd.DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]}) + df = DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]}) const = 2 result = getattr(df, opname)(const).dtypes.value_counts() @@ -338,7 +336,7 @@ def test_df_flex_cmp_constant_return_types(self, opname): @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"]) def test_df_flex_cmp_constant_return_types_empty(self, opname): # GH 15077 empty DataFrame - df = pd.DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]}) + df = DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]}) const = 2 empty = df.iloc[:0] @@ -347,12 +345,12 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): def test_df_flex_cmp_ea_dtype_with_ndarray_series(self): ii = pd.IntervalIndex.from_breaks([1, 2, 3]) - df = pd.DataFrame({"A": ii, "B": ii}) + df = DataFrame({"A": ii, "B": ii}) ser = Series([0, 0]) res = df.eq(ser, axis=0) - expected = pd.DataFrame({"A": [False, False], "B": [False, False]}) + expected = DataFrame({"A": [False, False], "B": [False, False]}) tm.assert_frame_equal(res, expected) ser2 = Series([1, 2], index=["A", "B"]) @@ -369,11 +367,11 @@ def test_floordiv_axis0(self): # make sure we df.floordiv(ser, axis=0) matches column-wise result arr = np.arange(3) ser = Series(arr) - df = pd.DataFrame({"A": ser, "B": ser}) + df = DataFrame({"A": ser, "B": ser}) result = df.floordiv(ser, axis=0) - expected = pd.DataFrame({col: df[col] // ser for col in df.columns}) + expected = DataFrame({col: df[col] // ser for col in df.columns}) tm.assert_frame_equal(result, expected) @@ -387,13 +385,13 @@ def test_floordiv_axis0_numexpr_path(self, opname): op = getattr(operator, opname) arr = np.arange(_MIN_ELEMENTS + 100).reshape(_MIN_ELEMENTS // 100 + 1, -1) * 100 - df = pd.DataFrame(arr) + df = DataFrame(arr) df["C"] = 1.0 ser = df[0] result = getattr(df, opname)(ser, axis=0) - expected = pd.DataFrame({col: op(df[col], ser) for col in df.columns}) + expected = DataFrame({col: op(df[col], ser) for col in df.columns}) tm.assert_frame_equal(result, expected) result2 = getattr(df, opname)(ser.values, axis=0) @@ -404,22 +402,22 @@ def test_df_add_td64_columnwise(self): dti = pd.date_range("2016-01-01", periods=10) tdi = pd.timedelta_range("1", periods=10) tser = Series(tdi) - df = pd.DataFrame({0: dti, 1: tdi}) + df = DataFrame({0: dti, 1: tdi}) result = df.add(tser, axis=0) - expected = pd.DataFrame({0: dti + tdi, 1: tdi + tdi}) + expected = DataFrame({0: dti + tdi, 1: tdi + tdi}) tm.assert_frame_equal(result, expected) def test_df_add_flex_filled_mixed_dtypes(self): # GH 19611 dti = pd.date_range("2016-01-01", periods=3) ser = Series(["1 Day", "NaT", "2 Days"], dtype="timedelta64[ns]") - df = pd.DataFrame({"A": dti, "B": ser}) - other = pd.DataFrame({"A": ser, "B": ser}) + df = DataFrame({"A": dti, "B": ser}) + other = DataFrame({"A": ser, "B": ser}) fill = pd.Timedelta(days=1).to_timedelta64() result = df.add(other, fill_value=fill) - expected = pd.DataFrame( + expected = DataFrame( { "A": Series( ["2016-01-02", "2016-01-03", "2016-01-05"], dtype="datetime64[ns]" @@ -531,13 +529,13 @@ def test_arith_flex_series(self, simple_frame): tm.assert_frame_equal(df.div(col, axis=0), (df.T / col).T) # broadcasting issue in GH 7325 - df = pd.DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype="int64") - expected = pd.DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) + df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype="int64") + expected = DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) result = df.div(df[0], axis="index") tm.assert_frame_equal(result, expected) - df = pd.DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype="float64") - expected = pd.DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) + df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype="float64") + expected = DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) result = df.div(df[0], axis="index") tm.assert_frame_equal(result, expected) @@ -545,8 +543,8 @@ def test_arith_flex_zero_len_raises(self): # GH 19522 passing fill_value to frame flex arith methods should # raise even in the zero-length special cases ser_len0 = Series([], dtype=object) - df_len0 = pd.DataFrame(columns=["A", "B"]) - df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + df_len0 = DataFrame(columns=["A", "B"]) + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) with pytest.raises(NotImplementedError, match="fill_value"): df.add(ser_len0, fill_value="E") @@ -557,7 +555,7 @@ def test_arith_flex_zero_len_raises(self): def test_flex_add_scalar_fill_value(self): # GH#12723 dat = np.array([0, 1, np.nan, 3, 4, 5], dtype="float") - df = pd.DataFrame({"foo": dat}, index=range(6)) + df = DataFrame({"foo": dat}, index=range(6)) exp = df.fillna(0).add(2) res = df.add(2, fill_value=0) @@ -569,21 +567,21 @@ def test_td64_op_nat_casting(self): # Make sure we don't accidentally treat timedelta64(NaT) as datetime64 # when calling dispatch_to_series in DataFrame arithmetic ser = Series(["NaT", "NaT"], dtype="timedelta64[ns]") - df = pd.DataFrame([[1, 2], [3, 4]]) + df = DataFrame([[1, 2], [3, 4]]) result = df * ser - expected = pd.DataFrame({0: ser, 1: ser}) + expected = DataFrame({0: ser, 1: ser}) tm.assert_frame_equal(result, expected) def test_df_add_2d_array_rowlike_broadcasts(self): # GH#23000 arr = np.arange(6).reshape(3, 2) - df = pd.DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) rowlike = arr[[1], :] # shape --> (1, ncols) assert rowlike.shape == (1, df.shape[1]) - expected = pd.DataFrame( + expected = DataFrame( [[2, 4], [4, 6], [6, 8]], columns=df.columns, index=df.index, @@ -599,12 +597,12 @@ def test_df_add_2d_array_rowlike_broadcasts(self): def test_df_add_2d_array_collike_broadcasts(self): # GH#23000 arr = np.arange(6).reshape(3, 2) - df = pd.DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) collike = arr[:, [1]] # shape --> (nrows, 1) assert collike.shape == (df.shape[0], 1) - expected = pd.DataFrame( + expected = DataFrame( [[1, 2], [5, 6], [9, 10]], columns=df.columns, index=df.index, @@ -622,7 +620,7 @@ def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators): opname = all_arithmetic_operators arr = np.arange(6).reshape(3, 2) - df = pd.DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) rowlike = arr[[1], :] # shape --> (1, ncols) assert rowlike.shape == (1, df.shape[1]) @@ -633,7 +631,7 @@ def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators): getattr(df.loc["C"], opname)(rowlike.squeeze()), ] - expected = pd.DataFrame(exvals, columns=df.columns, index=df.index) + expected = DataFrame(exvals, columns=df.columns, index=df.index) result = getattr(df, opname)(rowlike) tm.assert_frame_equal(result, expected) @@ -643,7 +641,7 @@ def test_df_arith_2d_array_collike_broadcasts(self, all_arithmetic_operators): opname = all_arithmetic_operators arr = np.arange(6).reshape(3, 2) - df = pd.DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) collike = arr[:, [1]] # shape --> (nrows, 1) assert collike.shape == (df.shape[0], 1) @@ -659,7 +657,7 @@ def test_df_arith_2d_array_collike_broadcasts(self, all_arithmetic_operators): # DataFrame op will return all-float. So we upcast `expected` dtype = np.common_type(*[x.values for x in exvals.values()]) - expected = pd.DataFrame(exvals, columns=df.columns, index=df.index, dtype=dtype) + expected = DataFrame(exvals, columns=df.columns, index=df.index, dtype=dtype) result = getattr(df, opname)(collike) tm.assert_frame_equal(result, expected) @@ -667,7 +665,7 @@ def test_df_arith_2d_array_collike_broadcasts(self, all_arithmetic_operators): def test_df_bool_mul_int(self): # GH 22047, GH 22163 multiplication by 1 should result in int dtype, # not object dtype - df = pd.DataFrame([[False, True], [False, False]]) + df = DataFrame([[False, True], [False, False]]) result = df * 1 # On appveyor this comes back as np.int32 instead of np.int64, @@ -681,14 +679,14 @@ def test_df_bool_mul_int(self): def test_arith_mixed(self): - left = pd.DataFrame({"A": ["a", "b", "c"], "B": [1, 2, 3]}) + left = DataFrame({"A": ["a", "b", "c"], "B": [1, 2, 3]}) result = left + left - expected = pd.DataFrame({"A": ["aa", "bb", "cc"], "B": [2, 4, 6]}) + expected = DataFrame({"A": ["aa", "bb", "cc"], "B": [2, 4, 6]}) tm.assert_frame_equal(result, expected) def test_arith_getitem_commute(self): - df = pd.DataFrame({"A": [1.1, 3.3], "B": [2.5, -3.9]}) + df = DataFrame({"A": [1.1, 3.3], "B": [2.5, -3.9]}) def _test_op(df, op): result = op(df, 1) @@ -723,35 +721,35 @@ def _test_op(df, op): ) def test_arith_alignment_non_pandas_object(self, values): # GH#17901 - df = pd.DataFrame({"A": [1, 1], "B": [1, 1]}) - expected = pd.DataFrame({"A": [2, 2], "B": [3, 3]}) + df = DataFrame({"A": [1, 1], "B": [1, 1]}) + expected = DataFrame({"A": [2, 2], "B": [3, 3]}) result = df + values tm.assert_frame_equal(result, expected) def test_arith_non_pandas_object(self): - df = pd.DataFrame( + df = DataFrame( np.arange(1, 10, dtype="f8").reshape(3, 3), columns=["one", "two", "three"], index=["a", "b", "c"], ) val1 = df.xs("a").values - added = pd.DataFrame(df.values + val1, index=df.index, columns=df.columns) + added = DataFrame(df.values + val1, index=df.index, columns=df.columns) tm.assert_frame_equal(df + val1, added) - added = pd.DataFrame((df.values.T + val1).T, index=df.index, columns=df.columns) + added = DataFrame((df.values.T + val1).T, index=df.index, columns=df.columns) tm.assert_frame_equal(df.add(val1, axis=0), added) val2 = list(df["two"]) - added = pd.DataFrame(df.values + val2, index=df.index, columns=df.columns) + added = DataFrame(df.values + val2, index=df.index, columns=df.columns) tm.assert_frame_equal(df + val2, added) - added = pd.DataFrame((df.values.T + val2).T, index=df.index, columns=df.columns) + added = DataFrame((df.values.T + val2).T, index=df.index, columns=df.columns) tm.assert_frame_equal(df.add(val2, axis="index"), added) val3 = np.random.rand(*df.shape) - added = pd.DataFrame(df.values + val3, index=df.index, columns=df.columns) + added = DataFrame(df.values + val3, index=df.index, columns=df.columns) tm.assert_frame_equal(df.add(val3), added) def test_operations_with_interval_categories_index(self, all_arithmetic_operators): @@ -759,15 +757,15 @@ def test_operations_with_interval_categories_index(self, all_arithmetic_operator op = all_arithmetic_operators ind = pd.CategoricalIndex(pd.interval_range(start=0.0, end=2.0)) data = [1, 2] - df = pd.DataFrame([data], columns=ind) + df = DataFrame([data], columns=ind) num = 10 result = getattr(df, op)(num) - expected = pd.DataFrame([[getattr(n, op)(num) for n in data]], columns=ind) + expected = DataFrame([[getattr(n, op)(num) for n in data]], columns=ind) tm.assert_frame_equal(result, expected) def test_frame_with_frame_reindex(self): # GH#31623 - df = pd.DataFrame( + df = DataFrame( { "foo": [pd.Timestamp("2019"), pd.Timestamp("2020")], "bar": [pd.Timestamp("2018"), pd.Timestamp("2021")], @@ -778,7 +776,7 @@ def test_frame_with_frame_reindex(self): result = df - df2 - expected = pd.DataFrame( + expected = DataFrame( {"foo": [pd.Timedelta(0), pd.Timedelta(0)], "bar": [np.nan, np.nan]}, columns=["bar", "foo"], ) @@ -788,31 +786,31 @@ def test_frame_with_frame_reindex(self): def test_frame_with_zero_len_series_corner_cases(): # GH#28600 # easy all-float case - df = pd.DataFrame(np.random.randn(6).reshape(3, 2), columns=["A", "B"]) + df = DataFrame(np.random.randn(6).reshape(3, 2), columns=["A", "B"]) ser = Series(dtype=np.float64) result = df + ser - expected = pd.DataFrame(df.values * np.nan, columns=df.columns) + expected = DataFrame(df.values * np.nan, columns=df.columns) tm.assert_frame_equal(result, expected) with tm.assert_produces_warning(FutureWarning): # Automatic alignment for comparisons deprecated result = df == ser - expected = pd.DataFrame(False, index=df.index, columns=df.columns) + expected = DataFrame(False, index=df.index, columns=df.columns) tm.assert_frame_equal(result, expected) # non-float case should not raise on comparison - df2 = pd.DataFrame(df.values.view("M8[ns]"), columns=df.columns) + df2 = DataFrame(df.values.view("M8[ns]"), columns=df.columns) with tm.assert_produces_warning(FutureWarning): # Automatic alignment for comparisons deprecated result = df2 == ser - expected = pd.DataFrame(False, index=df.index, columns=df.columns) + expected = DataFrame(False, index=df.index, columns=df.columns) tm.assert_frame_equal(result, expected) def test_zero_len_frame_with_series_corner_cases(): # GH#28600 - df = pd.DataFrame(columns=["A", "B"], dtype=np.float64) + df = DataFrame(columns=["A", "B"], dtype=np.float64) ser = Series([1, 2], index=["A", "B"]) result = df + ser @@ -825,7 +823,7 @@ def test_frame_single_columns_object_sum_axis_1(): data = { "One": Series(["A", 1.2, np.nan]), } - df = pd.DataFrame(data) + df = DataFrame(data) result = df.sum(axis=1) expected = Series(["A", 1.2, 0]) tm.assert_series_equal(result, expected) @@ -840,7 +838,7 @@ def test_frame_single_columns_object_sum_axis_1(): class TestFrameArithmeticUnsorted: def test_frame_add_tz_mismatch_converts_to_utc(self): rng = pd.date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern") - df = pd.DataFrame(np.random.randn(len(rng)), index=rng, columns=["a"]) + df = DataFrame(np.random.randn(len(rng)), index=rng, columns=["a"]) df_moscow = df.tz_convert("Europe/Moscow") result = df + df_moscow @@ -851,7 +849,7 @@ def test_frame_add_tz_mismatch_converts_to_utc(self): def test_align_frame(self): rng = pd.period_range("1/1/2000", "1/1/2010", freq="A") - ts = pd.DataFrame(np.random.randn(len(rng), 3), index=rng) + ts = DataFrame(np.random.randn(len(rng), 3), index=rng) result = ts + ts[::2] expected = ts + ts @@ -1424,7 +1422,7 @@ def test_inplace_ops_identity2(self, op): def test_alignment_non_pandas(self): index = ["A", "B", "C"] columns = ["X", "Y", "Z"] - df = pd.DataFrame(np.random.randn(3, 3), index=index, columns=columns) + df = DataFrame(np.random.randn(3, 3), index=index, columns=columns) align = pd.core.ops.align_method_FRAME for val in [ @@ -1481,14 +1479,14 @@ def test_alignment_non_pandas(self): align(df, val, "columns") def test_no_warning(self, all_arithmetic_operators): - df = pd.DataFrame({"A": [0.0, 0.0], "B": [0.0, None]}) + df = DataFrame({"A": [0.0, 0.0], "B": [0.0, None]}) b = df["B"] with tm.assert_produces_warning(None): getattr(df, all_arithmetic_operators)(b) def test_dunder_methods_binary(self, all_arithmetic_operators): # GH#??? frame.__foo__ should only accept one argument - df = pd.DataFrame({"A": [0.0, 0.0], "B": [0.0, None]}) + df = DataFrame({"A": [0.0, 0.0], "B": [0.0, None]}) b = df["B"] with pytest.raises(TypeError, match="takes 2 positional arguments"): getattr(df, all_arithmetic_operators)(b, 0) @@ -1510,20 +1508,20 @@ def test_align_int_fill_bug(self): def test_pow_with_realignment(): # GH#32685 pow has special semantics for operating with null values - left = pd.DataFrame({"A": [0, 1, 2]}) - right = pd.DataFrame(index=[0, 1, 2]) + left = DataFrame({"A": [0, 1, 2]}) + right = DataFrame(index=[0, 1, 2]) result = left ** right - expected = pd.DataFrame({"A": [np.nan, 1.0, np.nan]}) + expected = DataFrame({"A": [np.nan, 1.0, np.nan]}) tm.assert_frame_equal(result, expected) # TODO: move to tests.arithmetic and parametrize def test_pow_nan_with_zero(): - left = pd.DataFrame({"A": [np.nan, np.nan, np.nan]}) - right = pd.DataFrame({"A": [0, 0, 0]}) + left = DataFrame({"A": [np.nan, np.nan, np.nan]}) + right = DataFrame({"A": [0, 0, 0]}) - expected = pd.DataFrame({"A": [1.0, 1.0, 1.0]}) + expected = DataFrame({"A": [1.0, 1.0, 1.0]}) result = left ** right tm.assert_frame_equal(result, expected) @@ -1534,11 +1532,11 @@ def test_pow_nan_with_zero(): def test_dataframe_series_extension_dtypes(): # https://github.com/pandas-dev/pandas/issues/34311 - df = pd.DataFrame(np.random.randint(0, 100, (10, 3)), columns=["a", "b", "c"]) + df = DataFrame(np.random.randint(0, 100, (10, 3)), columns=["a", "b", "c"]) ser = Series([1, 2, 3], index=["a", "b", "c"]) expected = df.to_numpy("int64") + ser.to_numpy("int64").reshape(-1, 3) - expected = pd.DataFrame(expected, columns=df.columns, dtype="Int64") + expected = DataFrame(expected, columns=df.columns, dtype="Int64") df_ea = df.astype("Int64") result = df_ea + ser @@ -1550,7 +1548,7 @@ def test_dataframe_series_extension_dtypes(): def test_dataframe_blockwise_slicelike(): # GH#34367 arr = np.random.randint(0, 1000, (100, 10)) - df1 = pd.DataFrame(arr) + df1 = DataFrame(arr) df2 = df1.copy() df2.iloc[0, [1, 3, 7]] = np.nan @@ -1565,20 +1563,20 @@ def test_dataframe_blockwise_slicelike(): for left, right in [(df1, df2), (df2, df3), (df4, df5)]: res = left + right - expected = pd.DataFrame({i: left[i] + right[i] for i in left.columns}) + expected = DataFrame({i: left[i] + right[i] for i in left.columns}) tm.assert_frame_equal(res, expected) @pytest.mark.parametrize( "df, col_dtype", [ - (pd.DataFrame([[1.0, 2.0], [4.0, 5.0]], columns=list("ab")), "float64"), - (pd.DataFrame([[1.0, "b"], [4.0, "b"]], columns=list("ab")), "object"), + (DataFrame([[1.0, 2.0], [4.0, 5.0]], columns=list("ab")), "float64"), + (DataFrame([[1.0, "b"], [4.0, "b"]], columns=list("ab")), "object"), ], ) def test_dataframe_operation_with_non_numeric_types(df, col_dtype): # GH #22663 - expected = pd.DataFrame([[0.0, np.nan], [3.0, np.nan]], columns=list("ab")) + expected = DataFrame([[0.0, np.nan], [3.0, np.nan]], columns=list("ab")) expected = expected.astype({"b": col_dtype}) result = df + Series([-1.0], index=list("a")) tm.assert_frame_equal(result, expected) @@ -1586,17 +1584,17 @@ def test_dataframe_operation_with_non_numeric_types(df, col_dtype): def test_arith_reindex_with_duplicates(): # https://github.com/pandas-dev/pandas/issues/35194 - df1 = pd.DataFrame(data=[[0]], columns=["second"]) - df2 = pd.DataFrame(data=[[0, 0, 0]], columns=["first", "second", "second"]) + df1 = DataFrame(data=[[0]], columns=["second"]) + df2 = DataFrame(data=[[0, 0, 0]], columns=["first", "second", "second"]) result = df1 + df2 - expected = pd.DataFrame([[np.nan, 0, 0]], columns=["first", "second", "second"]) + expected = DataFrame([[np.nan, 0, 0]], columns=["first", "second", "second"]) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("to_add", [[Series([1, 1])], [Series([1, 1]), Series([1, 1])]]) def test_arith_list_of_arraylike_raise(to_add): # GH 36702. Raise when trying to add list of array-like to DataFrame - df = pd.DataFrame({"x": [1, 2], "y": [1, 2]}) + df = DataFrame({"x": [1, 2], "y": [1, 2]}) msg = f"Unable to coerce list of {type(to_add[0])} to Series/DataFrame" with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 2877905ddced1..5772c0650ebe4 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -302,7 +302,7 @@ def f(dtype): def test_equals_different_blocks(self): # GH 9330 - df0 = pd.DataFrame({"A": ["x", "y"], "B": [1, 2], "C": ["w", "z"]}) + df0 = DataFrame({"A": ["x", "y"], "B": [1, 2], "C": ["w", "z"]}) df1 = df0.reset_index()[["A", "B", "C"]] # this assert verifies that the above operations have # induced a block rearrangement @@ -607,16 +607,16 @@ def test_constructor_no_pandas_array(self): # Ensure that PandasArray isn't allowed inside Series # See https://github.com/pandas-dev/pandas/issues/23995 for more. arr = Series([1, 2, 3]).array - result = pd.DataFrame({"A": arr}) - expected = pd.DataFrame({"A": [1, 2, 3]}) + result = DataFrame({"A": arr}) + expected = DataFrame({"A": [1, 2, 3]}) tm.assert_frame_equal(result, expected) assert isinstance(result._mgr.blocks[0], IntBlock) def test_add_column_with_pandas_array(self): # GH 26390 - df = pd.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) + df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) df["c"] = pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)) - df2 = pd.DataFrame( + df2 = DataFrame( { "a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"], @@ -630,7 +630,7 @@ def test_add_column_with_pandas_array(self): def test_to_dict_of_blocks_item_cache(): # Calling to_dict_of_blocks should not poison item_cache - df = pd.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) + df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) df["c"] = pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)) mgr = df._mgr assert len(mgr.blocks) == 3 # i.e. not consolidated @@ -648,7 +648,7 @@ def test_to_dict_of_blocks_item_cache(): def test_update_inplace_sets_valid_block_values(): # https://github.com/pandas-dev/pandas/issues/33457 - df = pd.DataFrame({"a": Series([1, 2, None], dtype="category")}) + df = DataFrame({"a": Series([1, 2, None], dtype="category")}) # inplace update of a single column df["a"].fillna(1, inplace=True) @@ -664,7 +664,7 @@ def test_nonconsolidated_item_cache_take(): # https://github.com/pandas-dev/pandas/issues/35521 # create non-consolidated dataframe with object dtype columns - df = pd.DataFrame() + df = DataFrame() df["col1"] = Series(["a"], dtype=object) df["col2"] = Series([0], dtype=object) @@ -678,6 +678,6 @@ def test_nonconsolidated_item_cache_take(): # now setting value should update actual dataframe df.at[0, "col1"] = "A" - expected = pd.DataFrame({"col1": ["A"], "col2": [0]}, dtype=object) + expected = DataFrame({"col1": ["A"], "col2": [0]}, dtype=object) tm.assert_frame_equal(df, expected) assert df.at[0, "col1"] == "A" diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 2bc6953217cf8..acc87defb568c 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -154,17 +154,17 @@ def test_constructor_dtype_list_data(self): @pytest.mark.skipif(_np_version_under1p19, reason="NumPy change.") def test_constructor_list_of_2d_raises(self): # https://github.com/pandas-dev/pandas/issues/32289 - a = pd.DataFrame() + a = DataFrame() b = np.empty((0, 0)) with pytest.raises(ValueError, match=r"shape=\(1, 0, 0\)"): - pd.DataFrame([a]) + DataFrame([a]) with pytest.raises(ValueError, match=r"shape=\(1, 0, 0\)"): - pd.DataFrame([b]) + DataFrame([b]) - a = pd.DataFrame({"A": [1, 2]}) + a = DataFrame({"A": [1, 2]}) with pytest.raises(ValueError, match=r"shape=\(2, 2, 1\)"): - pd.DataFrame([a, a]) + DataFrame([a, a]) def test_constructor_mixed_dtypes(self): def _make_mixed_dtypes_df(typ, ad=None): @@ -1101,10 +1101,10 @@ def test_constructor_list_of_lists(self): def test_constructor_list_like_data_nested_list_column(self): # GH 32173 arrays = [list("abcd"), list("cdef")] - result = pd.DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays) + result = DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays) mi = MultiIndex.from_arrays(arrays) - expected = pd.DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=mi) + expected = DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=mi) tm.assert_frame_equal(result, expected) @@ -1655,10 +1655,10 @@ def test_constructor_index_names(self, name_in1, name_in2, name_in3, name_out): series = { c: Series([0, 1, 2], index=i) for i, c in zip(indices, ["x", "y", "z"]) } - result = pd.DataFrame(series) + result = DataFrame(series) exp_ind = pd.Index(["a", "b", "c", "d", "e"], name=name_out) - expected = pd.DataFrame( + expected = DataFrame( { "x": [0, 1, 2, np.nan, np.nan], "y": [np.nan, 0, 1, 2, np.nan], @@ -2342,7 +2342,7 @@ def test_from_records_empty_with_nonempty_fields_gh3682(self): def test_check_dtype_empty_numeric_column(self, dtype): # GH24386: Ensure dtypes are set correctly for an empty DataFrame. # Empty DataFrame is generated via dictionary data with non-overlapping columns. - data = pd.DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype) + data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype) assert data.b.dtype == dtype @@ -2352,7 +2352,7 @@ def test_check_dtype_empty_numeric_column(self, dtype): def test_check_dtype_empty_string_column(self, dtype): # GH24386: Ensure dtypes are set correctly for an empty DataFrame. # Empty DataFrame is generated via dictionary data with non-overlapping columns. - data = pd.DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype) + data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype) assert data.b.dtype.name == "object" @@ -2668,7 +2668,7 @@ def test_from_datetime_subclass(self): class DatetimeSubclass(datetime): pass - data = pd.DataFrame({"datetime": [DatetimeSubclass(2020, 1, 1, 1, 1)]}) + data = DataFrame({"datetime": [DatetimeSubclass(2020, 1, 1, 1, 1)]}) assert data.datetime.dtype == "datetime64[ns]" def test_with_mismatched_index_length_raises(self): @@ -2823,4 +2823,4 @@ def test_construction_from_set_raises(self): # https://github.com/pandas-dev/pandas/issues/32582 msg = "Set type is unordered" with pytest.raises(TypeError, match=msg): - pd.DataFrame({"a": {1, 2, 3}}) + DataFrame({"a": {1, 2, 3}}) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 96e56c329475c..d44c62e1defc7 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -35,21 +35,21 @@ def test_concat_empty_dataframe_dtypes(self): assert result["c"].dtype == np.float64 def test_empty_frame_dtypes(self): - empty_df = pd.DataFrame() + empty_df = DataFrame() tm.assert_series_equal(empty_df.dtypes, Series(dtype=object)) - nocols_df = pd.DataFrame(index=[1, 2, 3]) + nocols_df = DataFrame(index=[1, 2, 3]) tm.assert_series_equal(nocols_df.dtypes, Series(dtype=object)) - norows_df = pd.DataFrame(columns=list("abc")) + norows_df = DataFrame(columns=list("abc")) tm.assert_series_equal(norows_df.dtypes, Series(object, index=list("abc"))) - norows_int_df = pd.DataFrame(columns=list("abc")).astype(np.int32) + norows_int_df = DataFrame(columns=list("abc")).astype(np.int32) tm.assert_series_equal( norows_int_df.dtypes, Series(np.dtype("int32"), index=list("abc")) ) - df = pd.DataFrame(dict([("a", 1), ("b", True), ("c", 1.0)]), index=[1, 2, 3]) + df = DataFrame(dict([("a", 1), ("b", True), ("c", 1.0)]), index=[1, 2, 3]) ex_dtypes = Series(dict([("a", np.int64), ("b", np.bool_), ("c", np.float64)])) tm.assert_series_equal(df.dtypes, ex_dtypes) @@ -80,7 +80,7 @@ def test_datetime_with_tz_dtypes(self): def test_dtypes_are_correct_after_column_slice(self): # GH6525 - df = pd.DataFrame(index=range(5), columns=list("abc"), dtype=np.float_) + df = DataFrame(index=range(5), columns=list("abc"), dtype=np.float_) tm.assert_series_equal( df.dtypes, Series(dict([("a", np.float_), ("b", np.float_), ("c", np.float_)])), @@ -107,7 +107,7 @@ def test_dtypes_gh8722(self, float_string_frame): def test_singlerow_slice_categoricaldtype_gives_series(self): # GH29521 - df = pd.DataFrame({"x": pd.Categorical("a b c d e".split())}) + df = DataFrame({"x": pd.Categorical("a b c d e".split())}) result = df.iloc[0] raw_cat = pd.Categorical(["a"], categories=["a", "b", "c", "d", "e"]) expected = Series(raw_cat, index=["x"], name=0, dtype="category") @@ -227,7 +227,7 @@ def test_is_homogeneous_type(self, data, expected): assert data._is_homogeneous_type is expected def test_asarray_homogenous(self): - df = pd.DataFrame({"A": pd.Categorical([1, 2]), "B": pd.Categorical([1, 2])}) + df = DataFrame({"A": pd.Categorical([1, 2]), "B": pd.Categorical([1, 2])}) result = np.asarray(df) # may change from object in the future expected = np.array([[1, 1], [2, 2]], dtype="object") @@ -237,12 +237,12 @@ def test_str_to_small_float_conversion_type(self): # GH 20388 np.random.seed(13) col_data = [str(np.random.random() * 1e-12) for _ in range(5)] - result = pd.DataFrame(col_data, columns=["A"]) - expected = pd.DataFrame(col_data, columns=["A"], dtype=object) + result = DataFrame(col_data, columns=["A"]) + expected = DataFrame(col_data, columns=["A"], dtype=object) tm.assert_frame_equal(result, expected) # change the dtype of the elements from object to float one by one result.loc[result.index, "A"] = [float(x) for x in col_data] - expected = pd.DataFrame(col_data, columns=["A"], dtype=float) + expected = DataFrame(col_data, columns=["A"], dtype=float) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -251,14 +251,14 @@ def test_str_to_small_float_conversion_type(self): def test_convert_dtypes(self, convert_integer, expected): # Specific types are tested in tests/series/test_dtypes.py # Just check that it works for DataFrame here - df = pd.DataFrame( + df = DataFrame( { "a": Series([1, 2, 3], dtype=np.dtype("int32")), "b": Series(["x", "y", "z"], dtype=np.dtype("O")), } ) result = df.convert_dtypes(True, True, convert_integer, False) - expected = pd.DataFrame( + expected = DataFrame( { "a": Series([1, 2, 3], dtype=expected), "b": Series(["x", "y", "z"], dtype="string"), diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index 07cd307c8cc54..2438c743f3b8a 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -235,7 +235,7 @@ def test_join_str_datetime(self): def test_join_multiindex_leftright(self): # GH 10741 - df1 = pd.DataFrame( + df1 = DataFrame( [ ["a", "x", 0.471780], ["a", "y", 0.774908], @@ -250,11 +250,11 @@ def test_join_multiindex_leftright(self): columns=["first", "second", "value1"], ).set_index(["first", "second"]) - df2 = pd.DataFrame( - [["a", 10], ["b", 20]], columns=["first", "value2"] - ).set_index(["first"]) + df2 = DataFrame([["a", 10], ["b", 20]], columns=["first", "value2"]).set_index( + ["first"] + ) - exp = pd.DataFrame( + exp = DataFrame( [ [0.471780, 10], [0.774908, 10], @@ -277,7 +277,7 @@ def test_join_multiindex_leftright(self): exp_idx = pd.MultiIndex.from_product( [["a", "b"], ["x", "y", "z"]], names=["first", "second"] ) - exp = pd.DataFrame( + exp = DataFrame( [ [0.471780, 10], [0.774908, 10], diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 024403189409c..f3f2bbe1d160e 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -135,7 +135,7 @@ def test_dataframe_sub_numexpr_path(self): def test_query_non_str(self): # GH 11485 - df = pd.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "b"]}) + df = DataFrame({"A": [1, 2, 3], "B": ["a", "b", "b"]}) msg = "expr must be a string to be evaluated" with pytest.raises(ValueError, match=msg): @@ -146,7 +146,7 @@ def test_query_non_str(self): def test_query_empty_string(self): # GH 13139 - df = pd.DataFrame({"A": [1, 2, 3]}) + df = DataFrame({"A": [1, 2, 3]}) msg = "expr cannot be an empty string" with pytest.raises(ValueError, match=msg): @@ -162,9 +162,9 @@ def test_eval_resolvers_as_list(self): def test_eval_object_dtype_binop(self): # GH#24883 - df = pd.DataFrame({"a1": ["Y", "N"]}) + df = DataFrame({"a1": ["Y", "N"]}) res = df.eval("c = ((a1 == 'Y') & True)") - expected = pd.DataFrame({"a1": ["Y", "N"], "c": [True, False]}) + expected = DataFrame({"a1": ["Y", "N"], "c": [True, False]}) tm.assert_frame_equal(res, expected) @@ -716,12 +716,12 @@ def test_check_tz_aware_index_query(self, tz_aware_fixture): df_index = pd.date_range( start="2019-01-01", freq="1d", periods=10, tz=tz, name="time" ) - expected = pd.DataFrame(index=df_index) - df = pd.DataFrame(index=df_index) + expected = DataFrame(index=df_index) + df = DataFrame(index=df_index) result = df.query('"2018-01-03 00:00:00+00" < time') tm.assert_frame_equal(result, expected) - expected = pd.DataFrame(df_index) + expected = DataFrame(df_index) result = df.reset_index().query('"2018-01-03 00:00:00+00" < time') tm.assert_frame_equal(result, expected) @@ -1045,7 +1045,7 @@ def test_query_single_element_booleans(self, parser, engine): def test_query_string_scalar_variable(self, parser, engine): skip_if_no_pandas_parser(parser) - df = pd.DataFrame( + df = DataFrame( { "Symbol": ["BUD US", "BUD US", "IBM US", "IBM US"], "Price": [109.70, 109.72, 183.30, 183.35], diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 67c53a56eebe9..83a3b65d4b601 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -142,7 +142,7 @@ def test_stack_mixed_level(self): def test_unstack_not_consolidated(self): # Gh#34708 - df = pd.DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]}) + df = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]}) df2 = df[["x"]] df2["y"] = df["y"] assert len(df2._mgr.blocks) == 2 @@ -352,10 +352,10 @@ def test_unstack_tuplename_in_multiindex(self): idx = pd.MultiIndex.from_product( [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")] ) - df = pd.DataFrame({"d": [1] * 9, "e": [2] * 9}, index=idx) + df = DataFrame({"d": [1] * 9, "e": [2] * 9}, index=idx) result = df.unstack(("A", "a")) - expected = pd.DataFrame( + expected = DataFrame( [[1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2]], columns=pd.MultiIndex.from_tuples( [ @@ -413,17 +413,17 @@ def test_unstack_mixed_type_name_in_multiindex( idx = pd.MultiIndex.from_product( [["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"] ) - df = pd.DataFrame({"d": [1] * 8, "e": [2] * 8}, index=idx) + df = DataFrame({"d": [1] * 8, "e": [2] * 8}, index=idx) result = df.unstack(unstack_idx) - expected = pd.DataFrame( + expected = DataFrame( expected_values, columns=expected_columns, index=expected_index ) tm.assert_frame_equal(result, expected) def test_unstack_preserve_dtypes(self): # Checks fix for #11847 - df = pd.DataFrame( + df = DataFrame( dict( state=["IL", "MI", "NC"], index=["a", "b", "c"], @@ -595,7 +595,7 @@ def test_unstack_level_binding(self): names=["first", "second"], ) - expected = pd.DataFrame( + expected = DataFrame( np.array( [[np.nan, 0], [0, np.nan], [np.nan, 0], [0, np.nan]], dtype=np.float64 ), @@ -717,11 +717,11 @@ def test_unstack_non_unique_index_names(self): def test_unstack_unused_levels(self): # GH 17845: unused codes in index make unstack() cast int to float idx = pd.MultiIndex.from_product([["a"], ["A", "B", "C", "D"]])[:-1] - df = pd.DataFrame([[1, 0]] * 3, index=idx) + df = DataFrame([[1, 0]] * 3, index=idx) result = df.unstack() exp_col = pd.MultiIndex.from_product([[0, 1], ["A", "B", "C"]]) - expected = pd.DataFrame([[1, 1, 1, 0, 0, 0]], index=["a"], columns=exp_col) + expected = DataFrame([[1, 1, 1, 0, 0, 0]], index=["a"], columns=exp_col) tm.assert_frame_equal(result, expected) assert (result.columns.levels[1] == idx.levels[1]).all() @@ -730,9 +730,9 @@ def test_unstack_unused_levels(self): codes = [[0, 0, 1, 1], [0, 2, 0, 2]] idx = pd.MultiIndex(levels, codes) block = np.arange(4).reshape(2, 2) - df = pd.DataFrame(np.concatenate([block, block + 4]), index=idx) + df = DataFrame(np.concatenate([block, block + 4]), index=idx) result = df.unstack() - expected = pd.DataFrame( + expected = DataFrame( np.concatenate([block * 2, block * 2 + 1], axis=1), columns=idx ) tm.assert_frame_equal(result, expected) @@ -743,7 +743,7 @@ def test_unstack_unused_levels(self): codes = [[0, -1, 1, 1], [0, 2, -1, 2]] idx = pd.MultiIndex(levels, codes) data = np.arange(8) - df = pd.DataFrame(data.reshape(4, 2), index=idx) + df = DataFrame(data.reshape(4, 2), index=idx) cases = ( (0, [13, 16, 6, 9, 2, 5, 8, 11], [np.nan, "a", 2], [np.nan, 5, 1]), @@ -754,17 +754,13 @@ def test_unstack_unused_levels(self): exp_data = np.zeros(18) * np.nan exp_data[idces] = data cols = pd.MultiIndex.from_product([[0, 1], col_level]) - expected = pd.DataFrame( - exp_data.reshape(3, 6), index=idx_level, columns=cols - ) + expected = DataFrame(exp_data.reshape(3, 6), index=idx_level, columns=cols) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("cols", [["A", "C"], slice(None)]) def test_unstack_unused_level(self, cols): # GH 18562 : unused codes on the unstacked level - df = pd.DataFrame( - [[2010, "a", "I"], [2011, "b", "II"]], columns=["A", "B", "C"] - ) + df = DataFrame([[2010, "a", "I"], [2011, "b", "II"]], columns=["A", "B", "C"]) ind = df.set_index(["A", "B", "C"], drop=False) selection = ind.loc[(slice(None), slice(None), "I"), cols] @@ -780,7 +776,7 @@ def test_unstack_unused_level(self, cols): def test_unstack_long_index(self): # PH 32624: Error when using a lot of indices to unstack. # The error occurred only, if a lot of indices are used. - df = pd.DataFrame( + df = DataFrame( [[1]], columns=pd.MultiIndex.from_tuples([[0]], names=["c1"]), index=pd.MultiIndex.from_tuples( @@ -789,7 +785,7 @@ def test_unstack_long_index(self): ), ) result = df.unstack(["i2", "i3", "i4", "i5", "i6", "i7"]) - expected = pd.DataFrame( + expected = DataFrame( [[1]], columns=pd.MultiIndex.from_tuples( [[0, 0, 1, 0, 0, 0, 1]], @@ -801,7 +797,7 @@ def test_unstack_long_index(self): def test_unstack_multi_level_cols(self): # PH 24729: Unstack a df with multi level columns - df = pd.DataFrame( + df = DataFrame( [[0.0, 0.0], [0.0, 0.0]], columns=pd.MultiIndex.from_tuples( [["B", "C"], ["B", "D"]], names=["c1", "c2"] @@ -814,7 +810,7 @@ def test_unstack_multi_level_cols(self): def test_unstack_multi_level_rows_and_cols(self): # PH 28306: Unstack df with multi level cols and rows - df = pd.DataFrame( + df = DataFrame( [[1, 2], [3, 4], [-1, -2], [-3, -4]], columns=pd.MultiIndex.from_tuples([["a", "b", "c"], ["d", "e", "f"]]), index=pd.MultiIndex.from_tuples( @@ -918,7 +914,7 @@ def verify(df): verify(udf[col]) # GH7403 - df = pd.DataFrame({"A": list("aaaabbbb"), "B": range(8), "C": range(8)}) + df = DataFrame({"A": list("aaaabbbb"), "B": range(8), "C": range(8)}) df.iloc[3, 1] = np.NaN left = df.set_index(["A", "B"]).unstack(0) @@ -947,9 +943,7 @@ def verify(df): right = DataFrame(vals, columns=cols, index=idx) tm.assert_frame_equal(left, right) - df = pd.DataFrame( - {"A": list("aaaabbbb"), "B": list(range(4)) * 2, "C": range(8)} - ) + df = DataFrame({"A": list("aaaabbbb"), "B": list(range(4)) * 2, "C": range(8)}) df.iloc[3, 1] = np.NaN left = df.set_index(["A", "B"]).unstack(0) @@ -962,7 +956,7 @@ def verify(df): tm.assert_frame_equal(left, right) # GH7401 - df = pd.DataFrame( + df = DataFrame( { "A": list("aaaaabbbbb"), "B": (date_range("2012-01-01", periods=5).tolist() * 2), @@ -1141,7 +1135,7 @@ def test_stack_preserve_categorical_dtype(self, ordered, labels): def test_stack_preserve_categorical_dtype_values(self): # GH-23077 cat = pd.Categorical(["a", "a", "b", "c"]) - df = pd.DataFrame({"A": cat, "B": cat}) + df = DataFrame({"A": cat, "B": cat}) result = df.stack() index = pd.MultiIndex.from_product([[0, 1, 2, 3], ["A", "B"]]) expected = Series( @@ -1159,10 +1153,10 @@ def test_stack_preserve_categorical_dtype_values(self): ) def test_stack_multi_columns_non_unique_index(self, index, columns): # GH-28301 - df = pd.DataFrame(index=index, columns=columns).fillna(1) + df = DataFrame(index=index, columns=columns).fillna(1) stacked = df.stack() new_index = pd.MultiIndex.from_tuples(stacked.index.to_numpy()) - expected = pd.DataFrame( + expected = DataFrame( stacked.to_numpy(), index=new_index, columns=stacked.columns ) tm.assert_frame_equal(stacked, expected) @@ -1175,7 +1169,7 @@ def test_unstack_mixed_extension_types(self, level): index = pd.MultiIndex.from_tuples( [("A", 0), ("A", 1), ("B", 1)], names=["a", "b"] ) - df = pd.DataFrame( + df = DataFrame( { "A": pd.core.arrays.integer_array([0, 1, None]), "B": pd.Categorical(["a", "a", "b"]), @@ -1196,10 +1190,10 @@ def test_unstack_mixed_extension_types(self, level): def test_unstack_swaplevel_sortlevel(self, level): # GH 20994 mi = pd.MultiIndex.from_product([[0], ["d", "c"]], names=["bar", "baz"]) - df = pd.DataFrame([[0, 2], [1, 3]], index=mi, columns=["B", "A"]) + df = DataFrame([[0, 2], [1, 3]], index=mi, columns=["B", "A"]) df.columns.name = "foo" - expected = pd.DataFrame( + expected = DataFrame( [[3, 1, 2, 0]], columns=pd.MultiIndex.from_tuples( [("c", "A"), ("c", "B"), ("d", "A"), ("d", "B")], names=["baz", "foo"] @@ -1220,14 +1214,14 @@ def test_unstack_fill_frame_object(): # By default missing values will be NaN result = data.unstack() - expected = pd.DataFrame( + expected = DataFrame( {"a": ["a", np.nan, "a"], "b": ["b", "c", np.nan]}, index=list("xyz") ) tm.assert_frame_equal(result, expected) # Fill with any value replaces missing values as expected result = data.unstack(fill_value="d") - expected = pd.DataFrame( + expected = DataFrame( {"a": ["a", "d", "a"], "b": ["b", "c", "d"]}, index=list("xyz") ) tm.assert_frame_equal(result, expected) @@ -1235,7 +1229,7 @@ def test_unstack_fill_frame_object(): def test_unstack_timezone_aware_values(): # GH 18338 - df = pd.DataFrame( + df = DataFrame( { "timestamp": [pd.Timestamp("2017-08-27 01:00:00.709949+0000", tz="UTC")], "a": ["a"], @@ -1245,7 +1239,7 @@ def test_unstack_timezone_aware_values(): columns=["timestamp", "a", "b", "c"], ) result = df.set_index(["a", "b"]).unstack() - expected = pd.DataFrame( + expected = DataFrame( [[pd.Timestamp("2017-08-27 01:00:00.709949+0000", tz="UTC"), "c"]], index=pd.Index(["a"], name="a"), columns=pd.MultiIndex( @@ -1262,7 +1256,7 @@ def test_stack_timezone_aware_values(): ts = pd.date_range( freq="D", start="20180101", end="20180103", tz="America/New_York" ) - df = pd.DataFrame({"A": ts}, index=["a", "b", "c"]) + df = DataFrame({"A": ts}, index=["a", "b", "c"]) result = df.stack() expected = Series( ts, @@ -1307,11 +1301,11 @@ def test_unstacking_multi_index_df(): def test_stack_positional_level_duplicate_column_names(): # https://github.com/pandas-dev/pandas/issues/36353 columns = pd.MultiIndex.from_product([("x", "y"), ("y", "z")], names=["a", "a"]) - df = pd.DataFrame([[1, 1, 1, 1]], columns=columns) + df = DataFrame([[1, 1, 1, 1]], columns=columns) result = df.stack(0) new_columns = pd.Index(["y", "z"], name="a") new_index = pd.MultiIndex.from_tuples([(0, "x"), (0, "y")], names=[None, "a"]) - expected = pd.DataFrame([[1, 1], [1, 1]], index=new_index, columns=new_columns) + expected = DataFrame([[1, 1], [1, 1]], index=new_index, columns=new_columns) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index e4e22953397ca..f3667c4dd9d9d 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -45,14 +45,10 @@ def test_datetime_assignment_with_NaT_and_diff_time_units(self): data_ns = np.array([1, "nat"], dtype="datetime64[ns]") result = pd.Series(data_ns).to_frame() result["new"] = data_ns - expected = pd.DataFrame( - {0: [1, None], "new": [1, None]}, dtype="datetime64[ns]" - ) + expected = DataFrame({0: [1, None], "new": [1, None]}, dtype="datetime64[ns]") tm.assert_frame_equal(result, expected) # OutOfBoundsDatetime error shouldn't occur data_s = np.array([1, "nat"], dtype="datetime64[s]") result["new"] = data_s - expected = pd.DataFrame( - {0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]" - ) + expected = DataFrame({0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index b085704e8b06f..38032ff717afc 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -133,7 +133,7 @@ def test_to_csv_from_csv4(self): with tm.ensure_clean("__tmp_to_csv_from_csv4__") as path: # GH 10833 (TimedeltaIndex formatting) dt = pd.Timedelta(seconds=1) - df = pd.DataFrame( + df = DataFrame( {"dt_data": [i * dt for i in range(3)]}, index=pd.Index([i * dt for i in range(3)], name="dt_index"), ) @@ -1257,7 +1257,7 @@ def test_to_csv_quoting(self): # xref gh-7791: make sure the quoting parameter is passed through # with multi-indexes - df = pd.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}) + df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}) df = df.set_index(["a", "b"]) expected_rows = ['"a","b","c"', '"1","3","5"', '"2","4","6"'] @@ -1270,7 +1270,7 @@ def test_period_index_date_overflow(self): dates = ["1990-01-01", "2000-01-01", "3005-01-01"] index = pd.PeriodIndex(dates, freq="D") - df = pd.DataFrame([4, 5, 6], index=index) + df = DataFrame([4, 5, 6], index=index) result = df.to_csv() expected_rows = [",0", "1990-01-01,4", "2000-01-01,5", "3005-01-01,6"] @@ -1288,7 +1288,7 @@ def test_period_index_date_overflow(self): dates = ["1990-01-01", pd.NaT, "3005-01-01"] index = pd.PeriodIndex(dates, freq="D") - df = pd.DataFrame([4, 5, 6], index=index) + df = DataFrame([4, 5, 6], index=index) result = df.to_csv() expected_rows = [",0", "1990-01-01,4", ",5", "3005-01-01,6"] @@ -1298,7 +1298,7 @@ def test_period_index_date_overflow(self): def test_multi_index_header(self): # see gh-5539 columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]) - df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) df.columns = columns header = ["a", "b", "c", "d"] @@ -1311,7 +1311,7 @@ def test_multi_index_header(self): def test_to_csv_single_level_multi_index(self): # see gh-26303 index = pd.Index([(1,), (2,), (3,)]) - df = pd.DataFrame([[1, 2, 3]], columns=index) + df = DataFrame([[1, 2, 3]], columns=index) df = df.reindex(columns=[(1,), (3,)]) expected = ",1,3\n0,1,3\n" result = df.to_csv(line_terminator="\n") @@ -1319,7 +1319,7 @@ def test_to_csv_single_level_multi_index(self): def test_gz_lineend(self): # GH 25311 - df = pd.DataFrame({"a": [1, 2]}) + df = DataFrame({"a": [1, 2]}) expected_rows = ["a", "1", "2"] expected = tm.convert_rows_list_to_csv_str(expected_rows) with tm.ensure_clean("__test_gz_lineend.csv.gz") as path: diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index ad0d1face53cf..9ecc0e6194912 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -24,7 +24,7 @@ def test_rename_mi(self): @pytest.mark.parametrize("func", ["_set_axis_name", "rename_axis"]) def test_set_axis_name(self, func): - df = pd.DataFrame([[1, 2], [3, 4]]) + df = DataFrame([[1, 2], [3, 4]]) result = methodcaller(func, "foo")(df) assert df.index.name is None diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index fe1c476ed2205..bc666ade9f13d 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -407,7 +407,7 @@ def test_sample(self): def test_sample_upsampling_without_replacement(self): # GH27451 - df = pd.DataFrame({"A": list("abc")}) + df = DataFrame({"A": list("abc")}) msg = ( "Replace has to be set to `True` when " "upsampling the population `frac` > 1." @@ -418,7 +418,7 @@ def test_sample_upsampling_without_replacement(self): def test_sample_is_copy(self): # GH-27357, GH-30784: ensure the result of sample is an actual copy and # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings - df = pd.DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) df2 = df.sample(3) with tm.assert_produces_warning(None): @@ -542,7 +542,7 @@ def test_sample(sel): easy_weight_list = [0] * 10 easy_weight_list[5] = 1 - df = pd.DataFrame( + df = DataFrame( { "col1": range(10, 20), "col2": range(20, 30), @@ -578,7 +578,7 @@ def test_sample(sel): ### # Test axis argument - df = pd.DataFrame({"col1": range(10), "col2": ["a"] * 10}) + df = DataFrame({"col1": range(10), "col2": ["a"] * 10}) second_column_weight = [0, 1] tm.assert_frame_equal( df.sample(n=1, axis=1, weights=second_column_weight), df[["col2"]] @@ -615,7 +615,7 @@ def test_sample(sel): easy_weight_list = [0] * 3 easy_weight_list[2] = 1 - df = pd.DataFrame( + df = DataFrame( {"col1": range(10, 20), "col2": range(20, 30), "colString": ["a"] * 10} ) sample1 = df.sample(n=1, axis=1, weights=easy_weight_list) @@ -663,7 +663,7 @@ def test_sample(sel): ) def test_sample_random_state(self, func_str, arg): # GH32503 - df = pd.DataFrame({"col1": range(10, 20), "col2": range(20, 30)}) + df = DataFrame({"col1": range(10, 20), "col2": range(20, 30)}) result = df.sample(n=3, random_state=eval(func_str)(arg)) expected = df.sample(n=3, random_state=com.random_state(eval(func_str)(arg))) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index c7a52dd45fadc..a1cbf38d8eae6 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -20,7 +20,7 @@ def test_groupby_agg_no_extra_calls(): # GH#31760 - df = pd.DataFrame({"key": ["a", "b", "c", "c"], "value": [1, 2, 3, 4]}) + df = DataFrame({"key": ["a", "b", "c", "c"], "value": [1, 2, 3, 4]}) gb = df.groupby("key")["value"] def dummy_func(x): @@ -115,13 +115,13 @@ def test_groupby_aggregation_multi_level_column(): [True, True, np.nan, False], [True, True, np.nan, False], ] - df = pd.DataFrame( + df = DataFrame( data=lst, columns=pd.MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 0), ("B", 1)]), ) result = df.groupby(level=1, axis=1).sum() - expected = pd.DataFrame({0: [2.0, 1, 1, 1], 1: [1, 0, 1, 1]}) + expected = DataFrame({0: [2.0, 1, 1, 1], 1: [1, 0, 1, 1]}) tm.assert_frame_equal(result, expected) @@ -253,7 +253,7 @@ def test_agg_multiple_functions_maintain_order(df): def test_agg_multiple_functions_same_name(): # GH 30880 - df = pd.DataFrame( + df = DataFrame( np.random.randn(1000, 3), index=pd.date_range("1/1/2012", freq="S", periods=1000), columns=["A", "B", "C"], @@ -266,7 +266,7 @@ def test_agg_multiple_functions_same_name(): expected_values = np.array( [df.resample("3T").A.quantile(q=q).values for q in [0.9999, 0.1111]] ).T - expected = pd.DataFrame( + expected = DataFrame( expected_values, columns=expected_columns, index=expected_index ) tm.assert_frame_equal(result, expected) @@ -275,7 +275,7 @@ def test_agg_multiple_functions_same_name(): def test_agg_multiple_functions_same_name_with_ohlc_present(): # GH 30880 # ohlc expands dimensions, so different test to the above is required. - df = pd.DataFrame( + df = DataFrame( np.random.randn(1000, 3), index=pd.date_range("1/1/2012", freq="S", periods=1000), columns=["A", "B", "C"], @@ -298,7 +298,7 @@ def test_agg_multiple_functions_same_name_with_ohlc_present(): [df.resample("3T").A.quantile(q=q).values for q in [0.9999, 0.1111]] ).T expected_values = np.hstack([df.resample("3T").A.ohlc(), non_ohlc_expected_values]) - expected = pd.DataFrame( + expected = DataFrame( expected_values, columns=expected_columns, index=expected_index ) # PerformanceWarning is thrown by `assert col in right` in assert_frame_equal @@ -382,7 +382,7 @@ def test_multi_function_flexible_mix(df): def test_groupby_agg_coercing_bools(): # issue 14873 - dat = pd.DataFrame({"a": [1, 1, 2, 2], "b": [0, 1, 2, 3], "c": [None, None, 1, 1]}) + dat = DataFrame({"a": [1, 1, 2, 2], "b": [0, 1, 2, 3], "c": [None, None, 1, 1]}) gp = dat.groupby("a") index = Index([1, 2], name="a") @@ -410,7 +410,7 @@ def test_groupby_agg_coercing_bools(): def test_bool_agg_dtype(op): # GH 7001 # Bool sum aggregations result in int - df = pd.DataFrame({"a": [1, 1], "b": [False, True]}) + df = DataFrame({"a": [1, 1], "b": [False, True]}) s = df.set_index("a")["b"] result = op(df.groupby("a"))["b"].dtype @@ -422,7 +422,7 @@ def test_bool_agg_dtype(op): def test_order_aggregate_multiple_funcs(): # GH 25692 - df = pd.DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]}) + df = DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]}) res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"]) result = res.columns.levels[1] @@ -436,7 +436,7 @@ def test_order_aggregate_multiple_funcs(): @pytest.mark.parametrize("how", ["first", "last", "min", "max", "mean", "median"]) def test_uint64_type_handling(dtype, how): # GH 26310 - df = pd.DataFrame({"x": 6903052872240755750, "y": [1, 2]}) + df = DataFrame({"x": 6903052872240755750, "y": [1, 2]}) expected = df.groupby("y").agg({"x": how}) df.x = df.x.astype(dtype) result = df.groupby("y").agg({"x": how}) @@ -447,7 +447,7 @@ def test_uint64_type_handling(dtype, how): def test_func_duplicates_raises(): # GH28426 msg = "Function names" - df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) + df = DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) with pytest.raises(SpecificationError, match=msg): df.groupby("A").agg(["min", "min"]) @@ -471,7 +471,7 @@ def test_agg_index_has_complex_internals(index): def test_agg_split_block(): # https://github.com/pandas-dev/pandas/issues/31522 - df = pd.DataFrame( + df = DataFrame( { "key1": ["a", "a", "b", "b", "a"], "key2": ["one", "two", "one", "two", "one"], @@ -479,7 +479,7 @@ def test_agg_split_block(): } ) result = df.groupby("key1").min() - expected = pd.DataFrame( + expected = DataFrame( {"key2": ["one", "one"], "key3": ["six", "six"]}, index=pd.Index(["a", "b"], name="key1"), ) @@ -488,7 +488,7 @@ def test_agg_split_block(): def test_agg_split_object_part_datetime(): # https://github.com/pandas-dev/pandas/pull/31616 - df = pd.DataFrame( + df = DataFrame( { "A": pd.date_range("2000", periods=4), "B": ["a", "b", "c", "d"], @@ -499,7 +499,7 @@ def test_agg_split_object_part_datetime(): } ).astype(object) result = df.groupby([0, 0, 0, 0]).min() - expected = pd.DataFrame( + expected = DataFrame( { "A": [pd.Timestamp("2000")], "B": ["a"], @@ -517,7 +517,7 @@ def test_series_named_agg(self): df = Series([1, 2, 3, 4]) gr = df.groupby([0, 0, 1, 1]) result = gr.agg(a="sum", b="min") - expected = pd.DataFrame( + expected = DataFrame( {"a": [3, 7], "b": [1, 3]}, columns=["a", "b"], index=[0, 1] ) tm.assert_frame_equal(result, expected) @@ -533,20 +533,20 @@ def test_no_args_raises(self): # but we do allow this result = gr.agg([]) - expected = pd.DataFrame() + expected = DataFrame() tm.assert_frame_equal(result, expected) def test_series_named_agg_duplicates_no_raises(self): # GH28426 gr = Series([1, 2, 3]).groupby([0, 0, 1]) grouped = gr.agg(a="sum", b="sum") - expected = pd.DataFrame({"a": [3, 3], "b": [3, 3]}) + expected = DataFrame({"a": [3, 3], "b": [3, 3]}) tm.assert_frame_equal(expected, grouped) def test_mangled(self): gr = Series([1, 2, 3]).groupby([0, 0, 1]) result = gr.agg(a=lambda x: 0, b=lambda x: 1) - expected = pd.DataFrame({"a": [0, 0], "b": [1, 1]}) + expected = DataFrame({"a": [0, 0], "b": [1, 1]}) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -567,11 +567,11 @@ def test_named_agg_nametuple(self, inp): class TestNamedAggregationDataFrame: def test_agg_relabel(self): - df = pd.DataFrame( + df = DataFrame( {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} ) result = df.groupby("group").agg(a_max=("A", "max"), b_max=("B", "max")) - expected = pd.DataFrame( + expected = DataFrame( {"a_max": [1, 3], "b_max": [6, 8]}, index=pd.Index(["a", "b"], name="group"), columns=["a_max", "b_max"], @@ -588,7 +588,7 @@ def test_agg_relabel(self): b_max=("B", "max"), a_98=("A", p98), ) - expected = pd.DataFrame( + expected = DataFrame( { "b_min": [5, 7], "a_min": [0, 2], @@ -603,12 +603,12 @@ def test_agg_relabel(self): tm.assert_frame_equal(result, expected) def test_agg_relabel_non_identifier(self): - df = pd.DataFrame( + df = DataFrame( {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} ) result = df.groupby("group").agg(**{"my col": ("A", "max")}) - expected = pd.DataFrame( + expected = DataFrame( {"my col": [1, 3]}, index=pd.Index(["a", "b"], name="group") ) tm.assert_frame_equal(result, expected) @@ -616,10 +616,10 @@ def test_agg_relabel_non_identifier(self): def test_duplicate_no_raises(self): # GH 28426, if use same input function on same column, # no error should raise - df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) + df = DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) grouped = df.groupby("A").agg(a=("B", "min"), b=("B", "min")) - expected = pd.DataFrame( + expected = DataFrame( {"a": [1, 3], "b": [1, 3]}, index=pd.Index([0, 1], name="A") ) tm.assert_frame_equal(grouped, expected) @@ -629,34 +629,32 @@ def test_duplicate_no_raises(self): quant50.__name__ = "quant50" quant70.__name__ = "quant70" - test = pd.DataFrame( - {"col1": ["a", "a", "b", "b", "b"], "col2": [1, 2, 3, 4, 5]} - ) + test = DataFrame({"col1": ["a", "a", "b", "b", "b"], "col2": [1, 2, 3, 4, 5]}) grouped = test.groupby("col1").agg( quantile_50=("col2", quant50), quantile_70=("col2", quant70) ) - expected = pd.DataFrame( + expected = DataFrame( {"quantile_50": [1.5, 4.0], "quantile_70": [1.7, 4.4]}, index=pd.Index(["a", "b"], name="col1"), ) tm.assert_frame_equal(grouped, expected) def test_agg_relabel_with_level(self): - df = pd.DataFrame( + df = DataFrame( {"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}, index=pd.MultiIndex.from_product([["A", "B"], ["a", "b"]]), ) result = df.groupby(level=0).agg( aa=("A", "max"), bb=("A", "min"), cc=("B", "mean") ) - expected = pd.DataFrame( + expected = DataFrame( {"aa": [0, 1], "bb": [0, 1], "cc": [1.5, 3.5]}, index=["A", "B"] ) tm.assert_frame_equal(result, expected) def test_agg_relabel_other_raises(self): - df = pd.DataFrame({"A": [0, 0, 1], "B": [1, 2, 3]}) + df = DataFrame({"A": [0, 0, 1], "B": [1, 2, 3]}) grouped = df.groupby("A") match = "Must provide" with pytest.raises(TypeError, match=match): @@ -669,12 +667,12 @@ def test_agg_relabel_other_raises(self): grouped.agg(a=("B", "max"), b=(1, 2, 3)) def test_missing_raises(self): - df = pd.DataFrame({"A": [0, 1], "B": [1, 2]}) + df = DataFrame({"A": [0, 1], "B": [1, 2]}) with pytest.raises(KeyError, match="Column 'C' does not exist"): df.groupby("A").agg(c=("C", "sum")) def test_agg_namedtuple(self): - df = pd.DataFrame({"A": [0, 1], "B": [1, 2]}) + df = DataFrame({"A": [0, 1], "B": [1, 2]}) result = df.groupby("A").agg( b=pd.NamedAgg("B", "sum"), c=pd.NamedAgg(column="B", aggfunc="count") ) @@ -682,9 +680,9 @@ def test_agg_namedtuple(self): tm.assert_frame_equal(result, expected) def test_mangled(self): - df = pd.DataFrame({"A": [0, 1], "B": [1, 2], "C": [3, 4]}) + df = DataFrame({"A": [0, 1], "B": [1, 2], "C": [3, 4]}) result = df.groupby("A").agg(b=("B", lambda x: 0), c=("C", lambda x: 1)) - expected = pd.DataFrame( + expected = DataFrame( {"b": [0, 0], "c": [1, 1]}, index=pd.Index([0, 1], name="A") ) tm.assert_frame_equal(result, expected) @@ -773,9 +771,9 @@ def test_agg_relabel_multiindex_duplicates(): @pytest.mark.parametrize("kwargs", [{"c": ["min"]}, {"b": [], "c": ["min"]}]) def test_groupby_aggregate_empty_key(kwargs): # GH: 32580 - df = pd.DataFrame({"a": [1, 1, 2], "b": [1, 2, 3], "c": [1, 2, 4]}) + df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 3], "c": [1, 2, 4]}) result = df.groupby("a").agg(kwargs) - expected = pd.DataFrame( + expected = DataFrame( [1, 4], index=pd.Index([1, 2], dtype="int64", name="a"), columns=pd.MultiIndex.from_tuples([["c", "min"]]), @@ -785,9 +783,9 @@ def test_groupby_aggregate_empty_key(kwargs): def test_groupby_aggregate_empty_key_empty_return(): # GH: 32580 Check if everything works, when return is empty - df = pd.DataFrame({"a": [1, 1, 2], "b": [1, 2, 3], "c": [1, 2, 4]}) + df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 3], "c": [1, 2, 4]}) result = df.groupby("a").agg({"b": []}) - expected = pd.DataFrame(columns=pd.MultiIndex(levels=[["b"], []], codes=[[], []])) + expected = DataFrame(columns=pd.MultiIndex(levels=[["b"], []], codes=[[], []])) tm.assert_frame_equal(result, expected) @@ -795,13 +793,13 @@ def test_grouby_agg_loses_results_with_as_index_false_relabel(): # GH 32240: When the aggregate function relabels column names and # as_index=False is specified, the results are dropped. - df = pd.DataFrame( + df = DataFrame( {"key": ["x", "y", "z", "x", "y", "z"], "val": [1.0, 0.8, 2.0, 3.0, 3.6, 0.75]} ) grouped = df.groupby("key", as_index=False) result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min")) - expected = pd.DataFrame({"key": ["x", "y", "z"], "min_val": [1.0, 0.8, 0.75]}) + expected = DataFrame({"key": ["x", "y", "z"], "min_val": [1.0, 0.8, 0.75]}) tm.assert_frame_equal(result, expected) @@ -810,7 +808,7 @@ def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex(): # as_index=False is specified, the results are dropped. Check if # multiindex is returned in the right order - df = pd.DataFrame( + df = DataFrame( { "key": ["x", "y", "x", "y", "x", "x"], "key1": ["a", "b", "c", "b", "a", "c"], @@ -820,7 +818,7 @@ def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex(): grouped = df.groupby(["key", "key1"], as_index=False) result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min")) - expected = pd.DataFrame( + expected = DataFrame( {"key": ["x", "x", "y"], "key1": ["a", "c", "b"], "min_val": [1.0, 0.75, 0.8]} ) tm.assert_frame_equal(result, expected) @@ -832,10 +830,10 @@ def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex(): def test_multiindex_custom_func(func): # GH 31777 data = [[1, 4, 2], [5, 7, 1]] - df = pd.DataFrame(data, columns=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 3]])) + df = DataFrame(data, columns=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 3]])) result = df.groupby(np.array([0, 1])).agg(func) expected_dict = {(1, 3): {0: 1, 1: 5}, (1, 4): {0: 4, 1: 7}, (2, 3): {0: 2, 1: 1}} - expected = pd.DataFrame(expected_dict) + expected = DataFrame(expected_dict) tm.assert_frame_equal(result, expected) @@ -868,13 +866,13 @@ def test_lambda_named_agg(func): def test_aggregate_mixed_types(): # GH 16916 - df = pd.DataFrame( + df = DataFrame( data=np.array([0] * 9).reshape(3, 3), columns=list("XYZ"), index=list("abc") ) df["grouping"] = ["group 1", "group 1", 2] result = df.groupby("grouping").aggregate(lambda x: x.tolist()) expected_data = [[[0], [0], [0]], [[0, 0], [0, 0], [0, 0]]] - expected = pd.DataFrame( + expected = DataFrame( expected_data, index=Index([2, "group 1"], dtype="object", name="grouping"), columns=Index(["X", "Y", "Z"], dtype="object"), @@ -897,9 +895,9 @@ def aggfunc(x): else: return pd.NA - df = pd.DataFrame({"A": pd.array([1, 2, 3])}) + df = DataFrame({"A": pd.array([1, 2, 3])}) result = df.groupby([1, 1, 2]).agg(aggfunc) - expected = pd.DataFrame({"A": pd.array([1, pd.NA], dtype="Int64")}, index=[1, 2]) + expected = DataFrame({"A": pd.array([1, pd.NA], dtype="Int64")}, index=[1, 2]) tm.assert_frame_equal(result, expected) @@ -908,7 +906,7 @@ def test_groupby_aggregate_period_column(func): # GH 31471 groups = [1, 2] periods = pd.period_range("2020", periods=2, freq="Y") - df = pd.DataFrame({"a": groups, "b": periods}) + df = DataFrame({"a": groups, "b": periods}) result = getattr(df.groupby("a")["b"], func)() idx = pd.Int64Index([1, 2], name="a") @@ -922,21 +920,21 @@ def test_groupby_aggregate_period_frame(func): # GH 31471 groups = [1, 2] periods = pd.period_range("2020", periods=2, freq="Y") - df = pd.DataFrame({"a": groups, "b": periods}) + df = DataFrame({"a": groups, "b": periods}) result = getattr(df.groupby("a"), func)() idx = pd.Int64Index([1, 2], name="a") - expected = pd.DataFrame({"b": periods}, index=idx) + expected = DataFrame({"b": periods}, index=idx) tm.assert_frame_equal(result, expected) class TestLambdaMangling: def test_basic(self): - df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) + df = DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) result = df.groupby("A").agg({"B": [lambda x: 0, lambda x: 1]}) - expected = pd.DataFrame( + expected = DataFrame( {("B", ""): [0, 0], ("B", ""): [1, 1]}, index=pd.Index([0, 1], name="A"), ) @@ -945,7 +943,7 @@ def test_basic(self): def test_mangle_series_groupby(self): gr = Series([1, 2, 3, 4]).groupby([0, 0, 1, 1]) result = gr.agg([lambda x: 0, lambda x: 1]) - expected = pd.DataFrame({"": [0, 0], "": [1, 1]}) + expected = DataFrame({"": [0, 0], "": [1, 1]}) tm.assert_frame_equal(result, expected) @pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.") @@ -953,16 +951,16 @@ def test_with_kwargs(self): f1 = lambda x, y, b=1: x.sum() + y + b f2 = lambda x, y, b=2: x.sum() + y * b result = Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0) - expected = pd.DataFrame({"": [4], "": [6]}) + expected = DataFrame({"": [4], "": [6]}) tm.assert_frame_equal(result, expected) result = Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10) - expected = pd.DataFrame({"": [13], "": [30]}) + expected = DataFrame({"": [13], "": [30]}) tm.assert_frame_equal(result, expected) def test_agg_with_one_lambda(self): # GH 25719, write tests for DataFrameGroupby.agg with only one lambda - df = pd.DataFrame( + df = DataFrame( { "kind": ["cat", "dog", "cat", "dog"], "height": [9.1, 6.0, 9.5, 34.0], @@ -971,7 +969,7 @@ def test_agg_with_one_lambda(self): ) columns = ["height_sqr_min", "height_max", "weight_max"] - expected = pd.DataFrame( + expected = DataFrame( { "height_sqr_min": [82.81, 36.00], "height_max": [9.5, 34.0], @@ -1002,7 +1000,7 @@ def test_agg_with_one_lambda(self): def test_agg_multiple_lambda(self): # GH25719, test for DataFrameGroupby.agg with multiple lambdas # with mixed aggfunc - df = pd.DataFrame( + df = DataFrame( { "kind": ["cat", "dog", "cat", "dog"], "height": [9.1, 6.0, 9.5, 34.0], @@ -1016,7 +1014,7 @@ def test_agg_multiple_lambda(self): "height_max_2", "weight_min", ] - expected = pd.DataFrame( + expected = DataFrame( { "height_sqr_min": [82.81, 36.00], "height_max": [9.5, 34.0], @@ -1053,9 +1051,9 @@ def test_agg_multiple_lambda(self): def test_groupby_get_by_index(): # GH 33439 - df = pd.DataFrame({"A": ["S", "W", "W"], "B": [1.0, 1.0, 2.0]}) + df = DataFrame({"A": ["S", "W", "W"], "B": [1.0, 1.0, 2.0]}) res = df.groupby("A").agg({"B": lambda x: x.get(x.index[-1])}) - expected = pd.DataFrame(dict(A=["S", "W"], B=[1.0, 2.0])).set_index("A") + expected = DataFrame(dict(A=["S", "W"], B=[1.0, 2.0])).set_index("A") pd.testing.assert_frame_equal(res, expected) @@ -1071,7 +1069,7 @@ def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data): # test single aggregations on ordered categorical cols GHGH27800 # create the result dataframe - input_df = pd.DataFrame( + input_df = DataFrame( { "nr": [1, 2, 3, 4, 5, 6, 7, 8], "cat_ord": list("aabbccdd"), @@ -1088,7 +1086,7 @@ def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data): ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category" ) - expected_df = pd.DataFrame(data=exp_data, index=cat_index) + expected_df = DataFrame(data=exp_data, index=cat_index) tm.assert_frame_equal(result_df, expected_df) @@ -1105,7 +1103,7 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): # test combined aggregations on ordered categorical cols GH27800 # create the result dataframe - input_df = pd.DataFrame( + input_df = DataFrame( { "nr": [1, 2, 3, 4, 5, 6, 7, 8], "cat_ord": list("aabbccdd"), @@ -1133,7 +1131,7 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): multi_index_list.append([k, v]) multi_index = pd.MultiIndex.from_tuples(tuple(multi_index_list)) - expected_df = pd.DataFrame(data=exp_data, columns=multi_index, index=cat_index) + expected_df = DataFrame(data=exp_data, columns=multi_index, index=cat_index) tm.assert_frame_equal(result_df, expected_df) @@ -1141,7 +1139,7 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): def test_nonagg_agg(): # GH 35490 - Single/Multiple agg of non-agg function give same results # TODO: agg should raise for functions that don't aggregate - df = pd.DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 2, 1]}) + df = DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 2, 1]}) g = df.groupby("a") result = g.agg(["cumsum"]) @@ -1153,9 +1151,9 @@ def test_nonagg_agg(): def test_agg_no_suffix_index(): # GH36189 - df = pd.DataFrame([[4, 9]] * 3, columns=["A", "B"]) + df = DataFrame([[4, 9]] * 3, columns=["A", "B"]) result = df.agg(["sum", lambda x: x.sum(), lambda x: x.sum()]) - expected = pd.DataFrame( + expected = DataFrame( {"A": [12, 12, 12], "B": [27, 27, 27]}, index=["sum", "", ""] ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 87ebd8b5a27fb..e01855c1b7761 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -176,7 +176,7 @@ def test__cython_agg_general(op, targop): ], ) def test_cython_agg_empty_buckets(op, targop, observed): - df = pd.DataFrame([11, 12, 13]) + df = DataFrame([11, 12, 13]) grps = range(0, 55, 5) # calling _cython_agg_general directly, instead of via the user API @@ -192,14 +192,14 @@ def test_cython_agg_empty_buckets(op, targop, observed): def test_cython_agg_empty_buckets_nanops(observed): # GH-18869 can't call nanops on empty groups, so hardcode expected # for these - df = pd.DataFrame([11, 12, 13], columns=["a"]) + df = DataFrame([11, 12, 13], columns=["a"]) grps = range(0, 25, 5) # add / sum result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general( "add" ) intervals = pd.interval_range(0, 20, freq=5) - expected = pd.DataFrame( + expected = DataFrame( {"a": [0, 0, 36, 0]}, index=pd.CategoricalIndex(intervals, name="a", ordered=True), ) @@ -212,7 +212,7 @@ def test_cython_agg_empty_buckets_nanops(observed): result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general( "prod" ) - expected = pd.DataFrame( + expected = DataFrame( {"a": [1, 1, 1716, 1]}, index=pd.CategoricalIndex(intervals, name="a", ordered=True), ) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index a5f947cf656a0..15803d4b0ef94 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -143,7 +143,7 @@ def test_agg_cast_results_dtypes(): # xref #11444 u = [dt.datetime(2015, x + 1, 1) for x in range(12)] v = list("aaabbbbbbccd") - df = pd.DataFrame({"X": v, "Y": u}) + df = DataFrame({"X": v, "Y": u}) result = df.groupby("X")["Y"].agg(len) expected = df.groupby("X")["Y"].count() @@ -216,7 +216,7 @@ def test_aggregate_api_consistency(): def test_agg_dict_renaming_deprecation(): # 15931 - df = pd.DataFrame({"A": [1, 1, 1, 2, 2], "B": range(5), "C": range(5)}) + df = DataFrame({"A": [1, 1, 1, 2, 2], "B": range(5), "C": range(5)}) msg = r"nested renamer is not supported" with pytest.raises(SpecificationError, match=msg): @@ -414,7 +414,7 @@ def __call__(self, x): def test_agg_over_numpy_arrays(): # GH 3788 - df = pd.DataFrame( + df = DataFrame( [ [1, np.array([10, 20, 30])], [1, np.array([40, 50, 60])], @@ -427,9 +427,7 @@ def test_agg_over_numpy_arrays(): expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]] expected_index = pd.Index([1, 2], name="category") expected_column = ["arraydata"] - expected = pd.DataFrame( - expected_data, index=expected_index, columns=expected_column - ) + expected = DataFrame(expected_data, index=expected_index, columns=expected_column) tm.assert_frame_equal(result, expected) @@ -438,7 +436,7 @@ def test_agg_tzaware_non_datetime_result(): # discussed in GH#29589, fixed in GH#29641, operating on tzaware values # with function that is not dtype-preserving dti = pd.date_range("2012-01-01", periods=4, tz="UTC") - df = pd.DataFrame({"a": [0, 0, 1, 1], "b": dti}) + df = DataFrame({"a": [0, 0, 1, 1], "b": dti}) gb = df.groupby("a") # Case that _does_ preserve the dtype @@ -462,9 +460,7 @@ def test_agg_tzaware_non_datetime_result(): def test_agg_timezone_round_trip(): # GH 15426 ts = pd.Timestamp("2016-01-01 12:00:00", tz="US/Pacific") - df = pd.DataFrame( - {"a": 1, "b": [ts + dt.timedelta(minutes=nn) for nn in range(10)]} - ) + df = DataFrame({"a": 1, "b": [ts + dt.timedelta(minutes=nn) for nn in range(10)]}) result1 = df.groupby("a")["b"].agg(np.min).iloc[0] result2 = df.groupby("a")["b"].agg(lambda x: np.min(x)).iloc[0] @@ -477,7 +473,7 @@ def test_agg_timezone_round_trip(): dates = [ pd.Timestamp(f"2016-01-0{i:d} 12:00:00", tz="US/Pacific") for i in range(1, 5) ] - df = pd.DataFrame({"A": ["a", "b"] * 2, "B": dates}) + df = DataFrame({"A": ["a", "b"] * 2, "B": dates}) grouped = df.groupby("A") ts = df["B"].iloc[0] @@ -498,13 +494,13 @@ def test_agg_timezone_round_trip(): def test_sum_uint64_overflow(): # see gh-14758 # Convert to uint64 and don't overflow - df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object) + df = DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object) df = df + 9223372036854775807 index = pd.Index( [9223372036854775808, 9223372036854775810, 9223372036854775812], dtype=np.uint64 ) - expected = pd.DataFrame( + expected = DataFrame( {1: [9223372036854775809, 9223372036854775811, 9223372036854775813]}, index=index, ) @@ -517,20 +513,20 @@ def test_sum_uint64_overflow(): @pytest.mark.parametrize( "structure, expected", [ - (tuple, pd.DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}})), - (list, pd.DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}})), + (tuple, DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}})), + (list, DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}})), ( lambda x: tuple(x), - pd.DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}}), + DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}}), ), ( lambda x: list(x), - pd.DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}}), + DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}}), ), ], ) def test_agg_structs_dataframe(structure, expected): - df = pd.DataFrame( + df = DataFrame( {"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]} ) @@ -550,7 +546,7 @@ def test_agg_structs_dataframe(structure, expected): ) def test_agg_structs_series(structure, expected): # Issue #18079 - df = pd.DataFrame( + df = DataFrame( {"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]} ) @@ -561,7 +557,7 @@ def test_agg_structs_series(structure, expected): def test_agg_category_nansum(observed): categories = ["a", "b", "c"] - df = pd.DataFrame( + df = DataFrame( {"A": pd.Categorical(["a", "a", "b"], categories=categories), "B": [1, 2, 3]} ) result = df.groupby("A", observed=observed).B.agg(np.nansum) @@ -577,12 +573,10 @@ def test_agg_category_nansum(observed): def test_agg_list_like_func(): # GH 18473 - df = pd.DataFrame( - {"A": [str(x) for x in range(3)], "B": [str(x) for x in range(3)]} - ) + df = DataFrame({"A": [str(x) for x in range(3)], "B": [str(x) for x in range(3)]}) grouped = df.groupby("A", as_index=False, sort=False) result = grouped.agg({"B": lambda x: list(x)}) - expected = pd.DataFrame( + expected = DataFrame( {"A": [str(x) for x in range(3)], "B": [[str(x)] for x in range(3)]} ) tm.assert_frame_equal(result, expected) @@ -590,7 +584,7 @@ def test_agg_list_like_func(): def test_agg_lambda_with_timezone(): # GH 23683 - df = pd.DataFrame( + df = DataFrame( { "tag": [1, 1], "date": [ @@ -600,7 +594,7 @@ def test_agg_lambda_with_timezone(): } ) result = df.groupby("tag").agg({"date": lambda e: e.head(1)}) - expected = pd.DataFrame( + expected = DataFrame( [pd.Timestamp("2018-01-01", tz="UTC")], index=pd.Index([1], name="tag"), columns=["date"], @@ -629,7 +623,7 @@ def test_groupby_agg_err_catching(err_cls): from pandas.tests.extension.decimal.array import DecimalArray, make_data, to_decimal data = make_data()[:5] - df = pd.DataFrame( + df = DataFrame( {"id1": [0, 0, 0, 1, 1], "id2": [0, 1, 0, 1, 1], "decimals": DecimalArray(data)} ) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index feb758c82285d..ab44bd17d3f15 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -51,7 +51,7 @@ def test_apply_issues(): def test_apply_trivial(): # GH 20066 # trivial apply: ignore input and return a constant dataframe. - df = pd.DataFrame( + df = DataFrame( {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=["key", "data"], ) @@ -65,7 +65,7 @@ def test_apply_trivial(): def test_apply_trivial_fail(): # GH 20066 - df = pd.DataFrame( + df = DataFrame( {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=["key", "data"], ) @@ -189,7 +189,7 @@ def test_group_apply_once_per_group2(capsys): expected = 2 # Number of times `apply` should call a function for the current test - df = pd.DataFrame( + df = DataFrame( { "group_by_column": [0, 0, 0, 0, 1, 1, 1, 1], "test_column": ["0", "2", "4", "6", "8", "10", "12", "14"], @@ -241,7 +241,7 @@ def test_groupby_apply_identity_maybecopy_index_identical(func): # have an impact on the index structure of the result since this is not # transparent to the user - df = pd.DataFrame({"g": [1, 2, 2, 2], "a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) + df = DataFrame({"g": [1, 2, 2, 2], "a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) result = df.groupby("g").apply(func) tm.assert_frame_equal(result, df) @@ -538,7 +538,7 @@ def filt2(x): @pytest.mark.parametrize("test_series", [True, False]) def test_apply_with_duplicated_non_sorted_axis(test_series): # GH 30667 - df = pd.DataFrame( + df = DataFrame( [["x", "p"], ["x", "p"], ["x", "o"]], columns=["X", "Y"], index=[1, 2, 2] ) if test_series: @@ -565,9 +565,7 @@ def test_apply_reindex_values(): # solved in #30679 values = [1, 2, 3, 4] indices = [1, 1, 2, 2] - df = pd.DataFrame( - {"group": ["Group1", "Group2"] * 2, "value": values}, index=indices - ) + df = DataFrame({"group": ["Group1", "Group2"] * 2, "value": values}, index=indices) expected = Series(values, index=indices, name="value") def reindex_helper(x): @@ -608,7 +606,7 @@ def test_apply_numeric_coercion_when_datetime(): # for which are here. # GH 15670 - df = pd.DataFrame( + df = DataFrame( {"Number": [1, 2], "Date": ["2017-03-02"] * 2, "Str": ["foo", "inf"]} ) expected = df.groupby(["Number"]).apply(lambda x: x.iloc[0]) @@ -617,7 +615,7 @@ def test_apply_numeric_coercion_when_datetime(): tm.assert_series_equal(result["Str"], expected["Str"]) # GH 15421 - df = pd.DataFrame( + df = DataFrame( {"A": [10, 20, 30], "B": ["foo", "3", "4"], "T": [pd.Timestamp("12:31:22")] * 3} ) @@ -639,7 +637,7 @@ def predictions(tool): out["useTime"] = str(tool[tool.State == "step2"].oTime.values[0]) return out - df1 = pd.DataFrame( + df1 = DataFrame( { "Key": ["B", "B", "A", "A"], "State": ["step1", "step2", "step1", "step2"], @@ -658,7 +656,7 @@ def test_apply_aggregating_timedelta_and_datetime(): # Regression test for GH 15562 # The following groupby caused ValueErrors and IndexErrors pre 0.20.0 - df = pd.DataFrame( + df = DataFrame( { "clientid": ["A", "B", "C"], "datetime": [np.datetime64("2017-02-01 00:00:00")] * 3, @@ -670,7 +668,7 @@ def test_apply_aggregating_timedelta_and_datetime(): dict(clientid_age=ddf.time_delta_zero.min(), date=ddf.datetime.min()) ) ) - expected = pd.DataFrame( + expected = DataFrame( { "clientid": ["A", "B", "C"], "clientid_age": [np.timedelta64(0, "D")] * 3, @@ -686,13 +684,13 @@ def test_apply_groupby_datetimeindex(): # groupby apply failed on dataframe with DatetimeIndex data = [["A", 10], ["B", 20], ["B", 30], ["C", 40], ["C", 50]] - df = pd.DataFrame( + df = DataFrame( data, columns=["Name", "Value"], index=pd.date_range("2020-09-01", "2020-09-05") ) result = df.groupby("Name").sum() - expected = pd.DataFrame({"Name": ["A", "B", "C"], "Value": [10, 50, 90]}) + expected = DataFrame({"Name": ["A", "B", "C"], "Value": [10, 50, 90]}) expected.set_index("Name", inplace=True) tm.assert_frame_equal(result, expected) @@ -704,7 +702,7 @@ def test_time_field_bug(): # that were not returned by the apply function, an exception would be # raised. - df = pd.DataFrame({"a": 1, "b": [datetime.now() for nn in range(10)]}) + df = DataFrame({"a": 1, "b": [datetime.now() for nn in range(10)]}) def func_with_no_date(batch): return Series({"c": 2}) @@ -713,13 +711,11 @@ def func_with_date(batch): return Series({"b": datetime(2015, 1, 1), "c": 2}) dfg_no_conversion = df.groupby(by=["a"]).apply(func_with_no_date) - dfg_no_conversion_expected = pd.DataFrame({"c": 2}, index=[1]) + dfg_no_conversion_expected = DataFrame({"c": 2}, index=[1]) dfg_no_conversion_expected.index.name = "a" dfg_conversion = df.groupby(by=["a"]).apply(func_with_date) - dfg_conversion_expected = pd.DataFrame( - {"b": datetime(2015, 1, 1), "c": 2}, index=[1] - ) + dfg_conversion_expected = DataFrame({"b": datetime(2015, 1, 1), "c": 2}, index=[1]) dfg_conversion_expected.index.name = "a" tm.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected) @@ -788,7 +784,7 @@ def test_func(x): def test_groupby_apply_return_empty_chunk(): # GH 22221: apply filter which returns some empty groups - df = pd.DataFrame(dict(value=[0, 1], group=["filled", "empty"])) + df = DataFrame(dict(value=[0, 1], group=["filled", "empty"])) groups = df.groupby("group") result = groups.apply(lambda group: group[group.value != 1]["value"]) expected = Series( @@ -803,11 +799,11 @@ def test_groupby_apply_return_empty_chunk(): def test_apply_with_mixed_types(): # gh-20949 - df = pd.DataFrame({"A": "a a b".split(), "B": [1, 2, 3], "C": [4, 6, 5]}) + df = DataFrame({"A": "a a b".split(), "B": [1, 2, 3], "C": [4, 6, 5]}) g = df.groupby("A") result = g.transform(lambda x: x / x.sum()) - expected = pd.DataFrame({"B": [1 / 3.0, 2 / 3.0, 1], "C": [0.4, 0.6, 1.0]}) + expected = DataFrame({"B": [1 / 3.0, 2 / 3.0, 1], "C": [0.4, 0.6, 1.0]}) tm.assert_frame_equal(result, expected) result = g.apply(lambda x: x / x.sum()) @@ -835,10 +831,10 @@ def test_apply_datetime_issue(group_column_dtlike): # is a datetime object and the column labels are different from # standard int values in range(len(num_columns)) - df = pd.DataFrame({"a": ["foo"], "b": [group_column_dtlike]}) + df = DataFrame({"a": ["foo"], "b": [group_column_dtlike]}) result = df.groupby("a").apply(lambda x: Series(["spam"], index=[42])) - expected = pd.DataFrame( + expected = DataFrame( ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42] ) tm.assert_frame_equal(result, expected) @@ -891,11 +887,11 @@ def test_apply_multi_level_name(category): expected_index = pd.CategoricalIndex([1, 2], categories=[1, 2, 3], name="B") else: expected_index = pd.Index([1, 2], name="B") - df = pd.DataFrame( + df = DataFrame( {"A": np.arange(10), "B": b, "C": list(range(10)), "D": list(range(10))} ).set_index(["A", "B"]) result = df.groupby("B").apply(lambda x: x.sum()) - expected = pd.DataFrame({"C": [20, 25], "D": [20, 25]}, index=expected_index) + expected = DataFrame({"C": [20, 25], "D": [20, 25]}, index=expected_index) tm.assert_frame_equal(result, expected) assert df.index.names == ["A", "B"] @@ -953,7 +949,7 @@ def test_apply_index_has_complex_internals(index): ) def test_apply_function_returns_non_pandas_non_scalar(function, expected_values): # GH 31441 - df = pd.DataFrame(["A", "A", "B", "B"], columns=["groups"]) + df = DataFrame(["A", "A", "B", "B"], columns=["groups"]) result = df.groupby("groups").apply(function) expected = Series(expected_values, index=pd.Index(["A", "B"], name="groups")) tm.assert_series_equal(result, expected) @@ -964,7 +960,7 @@ def test_apply_function_returns_numpy_array(): def fct(group): return group["B"].values.flatten() - df = pd.DataFrame({"A": ["a", "a", "b", "none"], "B": [1, 2, 3, np.nan]}) + df = DataFrame({"A": ["a", "a", "b", "none"], "B": [1, 2, 3, np.nan]}) result = df.groupby("A").apply(fct) expected = Series( @@ -976,7 +972,7 @@ def fct(group): @pytest.mark.parametrize("function", [lambda gr: gr.index, lambda gr: gr.index + 1 - 1]) def test_apply_function_index_return(function): # GH: 22541 - df = pd.DataFrame([1, 2, 2, 2, 1, 2, 3, 1, 3, 1], columns=["id"]) + df = DataFrame([1, 2, 2, 2, 1, 2, 3, 1, 3, 1], columns=["id"]) result = df.groupby("id").apply(function) expected = Series( [pd.Index([0, 4, 7, 9]), pd.Index([1, 2, 3, 5]), pd.Index([6, 8])], @@ -987,9 +983,7 @@ def test_apply_function_index_return(function): def test_apply_function_with_indexing(): # GH: 33058 - df = pd.DataFrame( - {"col1": ["A", "A", "A", "B", "B", "B"], "col2": [1, 2, 3, 4, 5, 6]} - ) + df = DataFrame({"col1": ["A", "A", "A", "B", "B", "B"], "col2": [1, 2, 3, 4, 5, 6]}) def fn(x): x.col2[x.index[-1]] = 0 @@ -1026,8 +1020,8 @@ def test_apply_with_timezones_aware(): dates = ["2001-01-01"] * 2 + ["2001-01-02"] * 2 + ["2001-01-03"] * 2 index_no_tz = pd.DatetimeIndex(dates) index_tz = pd.DatetimeIndex(dates, tz="UTC") - df1 = pd.DataFrame({"x": list(range(2)) * 3, "y": range(6), "t": index_no_tz}) - df2 = pd.DataFrame({"x": list(range(2)) * 3, "y": range(6), "t": index_tz}) + df1 = DataFrame({"x": list(range(2)) * 3, "y": range(6), "t": index_no_tz}) + df2 = DataFrame({"x": list(range(2)) * 3, "y": range(6), "t": index_tz}) result1 = df1.groupby("x", group_keys=False).apply(lambda df: df[["x", "y"]].copy()) result2 = df2.groupby("x", group_keys=False).apply(lambda df: df[["x", "y"]].copy()) @@ -1046,7 +1040,7 @@ def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func): } ) - expected = pd.DataFrame( + expected = DataFrame( {"a": [264, 297], "b": [15, 6], "c": [150, 60]}, index=pd.Index([88, 99], name="a"), ) @@ -1067,7 +1061,7 @@ def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func): def test_apply_with_date_in_multiindex_does_not_convert_to_timestamp(): # GH 29617 - df = pd.DataFrame( + df = DataFrame( { "A": ["a", "a", "a", "b"], "B": [ @@ -1100,7 +1094,7 @@ def test_apply_by_cols_equals_apply_by_rows_transposed(): # should give the same result. There was previously a bug where the # by_rows operation would work fine, but by_cols would throw a ValueError - df = pd.DataFrame( + df = DataFrame( np.random.random([6, 4]), columns=pd.MultiIndex.from_product([["A", "B"], [1, 2]]), ) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index ab211845c1957..9785a95f3b6cb 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -471,7 +471,7 @@ def test_observed_nth(): # GH 26385 cat = pd.Categorical(["a", np.nan, np.nan], categories=["a", "b", "c"]) ser = Series([1, 2, 3]) - df = pd.DataFrame({"cat": cat, "ser": ser}) + df = DataFrame({"cat": cat, "ser": ser}) result = df.groupby("cat", observed=False)["ser"].nth(0) @@ -768,10 +768,10 @@ def test_preserve_on_ordered_ops(func, values): # gh-18502 # preserve the categoricals on ops c = pd.Categorical(["first", "second", "third", "fourth"], ordered=True) - df = pd.DataFrame({"payload": [-1, -2, -1, -2], "col": c}) + df = DataFrame({"payload": [-1, -2, -1, -2], "col": c}) g = df.groupby("payload") result = getattr(g, func)() - expected = pd.DataFrame( + expected = DataFrame( {"payload": [-2, -1], "col": Series(values, dtype=c.dtype)} ).set_index("payload") tm.assert_frame_equal(result, expected) @@ -818,9 +818,7 @@ def test_groupby_empty_with_category(): # GH-9614 # test fix for when group by on None resulted in # coercion of dtype categorical -> float - df = pd.DataFrame( - {"A": [None] * 3, "B": pd.Categorical(["train", "train", "test"])} - ) + df = DataFrame({"A": [None] * 3, "B": pd.Categorical(["train", "train", "test"])}) result = df.groupby("A").first()["B"] expected = Series( pd.Categorical([], categories=["test", "train"]), @@ -1280,10 +1278,10 @@ def test_groupby_cat_preserves_structure(observed, ordered): def test_get_nonexistent_category(): # Accessing a Category that is not in the dataframe - df = pd.DataFrame({"var": ["a", "a", "b", "b"], "val": range(4)}) + df = DataFrame({"var": ["a", "a", "b", "b"], "val": range(4)}) with pytest.raises(KeyError, match="'vau'"): df.groupby("var").apply( - lambda rows: pd.DataFrame( + lambda rows: DataFrame( {"var": [rows.iloc[-1]["var"]], "val": [rows.iloc[-1]["vau"]]} ) ) @@ -1300,7 +1298,7 @@ def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed, r ) request.node.add_marker(mark) - df = pd.DataFrame( + df = DataFrame( { "cat_1": pd.Categorical(list("AABB"), categories=list("ABCD")), "cat_2": pd.Categorical(list("AB") * 2, categories=list("ABCD")), @@ -1333,7 +1331,7 @@ def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans( ) request.node.add_marker(mark) - df = pd.DataFrame( + df = DataFrame( { "cat_1": pd.Categorical(list("AABB"), categories=list("ABC")), "cat_2": pd.Categorical(list("AB") * 2, categories=list("ABC")), @@ -1369,7 +1367,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun if reduction_func == "ngroup": pytest.skip("ngroup does not return the Categories on the index") - df = pd.DataFrame( + df = DataFrame( { "cat_1": pd.Categorical(list("AABB"), categories=list("ABC")), "cat_2": pd.Categorical(list("1111"), categories=list("12")), @@ -1399,7 +1397,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( if reduction_func == "ngroup": pytest.skip("ngroup does not return the Categories on the index") - df = pd.DataFrame( + df = DataFrame( { "cat_1": pd.Categorical(list("AABB"), categories=list("ABC")), "cat_2": pd.Categorical(list("1111"), categories=list("12")), @@ -1424,7 +1422,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( def test_series_groupby_categorical_aggregation_getitem(): # GH 8870 d = {"foo": [10, 8, 4, 1], "bar": [10, 20, 30, 40], "baz": ["d", "c", "d", "c"]} - df = pd.DataFrame(d) + df = DataFrame(d) cat = pd.cut(df["foo"], np.linspace(0, 20, 5)) df["range"] = cat groups = df.groupby(["range", "baz"], as_index=True, sort=True) @@ -1439,7 +1437,7 @@ def test_series_groupby_categorical_aggregation_getitem(): ) def test_groupby_agg_categorical_columns(func, expected_values): # 31256 - df = pd.DataFrame( + df = DataFrame( { "id": [0, 1, 2, 3, 4], "groups": [0, 1, 1, 2, 2], @@ -1448,17 +1446,15 @@ def test_groupby_agg_categorical_columns(func, expected_values): ).set_index("id") result = df.groupby("groups").agg(func) - expected = pd.DataFrame( + expected = DataFrame( {"value": expected_values}, index=pd.Index([0, 1, 2], name="groups") ) tm.assert_frame_equal(result, expected) def test_groupby_agg_non_numeric(): - df = pd.DataFrame( - {"A": pd.Categorical(["a", "a", "b"], categories=["a", "b", "c"])} - ) - expected = pd.DataFrame({"A": [2, 1]}, index=[1, 2]) + df = DataFrame({"A": pd.Categorical(["a", "a", "b"], categories=["a", "b", "c"])}) + expected = DataFrame({"A": [2, 1]}, index=[1, 2]) result = df.groupby([1, 2, 1]).agg(pd.Series.nunique) tm.assert_frame_equal(result, expected) @@ -1471,9 +1467,7 @@ def test_groupby_agg_non_numeric(): def test_groupy_first_returned_categorical_instead_of_dataframe(func): # GH 28641: groupby drops index, when grouping over categorical column with # first/last. Renamed Categorical instead of DataFrame previously. - df = pd.DataFrame( - {"A": [1997], "B": Series(["b"], dtype="category").cat.as_ordered()} - ) + df = DataFrame({"A": [1997], "B": Series(["b"], dtype="category").cat.as_ordered()}) df_grouped = df.groupby("A")["B"] result = getattr(df_grouped, func)() expected = Series(["b"], index=pd.Index([1997], name="A"), name="B") @@ -1494,7 +1488,7 @@ def test_read_only_category_no_sort(): def test_sorted_missing_category_values(): # GH 28597 - df = pd.DataFrame( + df = DataFrame( { "foo": [ "small", @@ -1515,7 +1509,7 @@ def test_sorted_missing_category_values(): .cat.set_categories(["tiny", "small", "medium", "large"], ordered=True) ) - expected = pd.DataFrame( + expected = DataFrame( { "tiny": {"A": 0, "C": 0}, "small": {"A": 0, "C": 1}, @@ -1539,7 +1533,7 @@ def test_sorted_missing_category_values(): def test_agg_cython_category_not_implemented_fallback(): # https://github.com/pandas-dev/pandas/issues/31450 - df = pd.DataFrame({"col_num": [1, 1, 2, 3]}) + df = DataFrame({"col_num": [1, 1, 2, 3]}) df["col_cat"] = df["col_num"].astype("category") result = df.groupby("col_num").col_cat.first() @@ -1557,15 +1551,15 @@ def test_agg_cython_category_not_implemented_fallback(): def test_aggregate_categorical_lost_index(func: str): # GH: 28641 groupby drops index, when grouping over categorical column with min/max ds = Series(["b"], dtype="category").cat.as_ordered() - df = pd.DataFrame({"A": [1997], "B": ds}) + df = DataFrame({"A": [1997], "B": ds}) result = df.groupby("A").agg({"B": func}) - expected = pd.DataFrame({"B": ["b"]}, index=pd.Index([1997], name="A")) + expected = DataFrame({"B": ["b"]}, index=pd.Index([1997], name="A")) tm.assert_frame_equal(result, expected) def test_aggregate_categorical_with_isnan(): # GH 29837 - df = pd.DataFrame( + df = DataFrame( { "A": [1, 1, 1, 1], "B": [1, 2, 1, 2], @@ -1579,7 +1573,7 @@ def test_aggregate_categorical_with_isnan(): result = df.groupby(["A", "B"]).agg(lambda df: df.isna().sum()) index = pd.MultiIndex.from_arrays([[1, 1], [1, 2]], names=("A", "B")) - expected = pd.DataFrame( + expected = DataFrame( data={ "numerical_col": [1.0, 0.0], "object_col": [0, 0], @@ -1592,7 +1586,7 @@ def test_aggregate_categorical_with_isnan(): def test_categorical_transform(): # GH 29037 - df = pd.DataFrame( + df = DataFrame( { "package_id": [1, 1, 1, 2, 2, 3], "status": [ @@ -1613,7 +1607,7 @@ def test_categorical_transform(): df["last_status"] = df.groupby("package_id")["status"].transform(max) result = df.copy() - expected = pd.DataFrame( + expected = DataFrame( { "package_id": [1, 1, 1, 2, 2, 3], "status": [ @@ -1647,7 +1641,7 @@ def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals( # GH 34951 cat = pd.Categorical([0, 0, 1, 1]) val = [0, 1, 1, 0] - df = pd.DataFrame({"a": cat, "b": cat, "c": val}) + df = DataFrame({"a": cat, "b": cat, "c": val}) idx = pd.Categorical([0, 1]) idx = pd.MultiIndex.from_product([idx, idx], names=["a", "b"]) @@ -1672,7 +1666,7 @@ def test_df_groupby_first_on_categorical_col_grouped_on_2_categoricals( # GH 34951 cat = pd.Categorical([0, 0, 1, 1]) val = [0, 1, 1, 0] - df = pd.DataFrame({"a": cat, "b": cat, "c": val}) + df = DataFrame({"a": cat, "b": cat, "c": val}) idx = pd.Categorical([0, 1]) idx = pd.MultiIndex.from_product([idx, idx], names=["a", "b"]) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index a5842dee2c43e..c03ed00e1a081 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -283,7 +283,7 @@ def test_count(): def test_count_non_nulls(): # GH#5610 # count counts non-nulls - df = pd.DataFrame( + df = DataFrame( [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, np.nan]], columns=["A", "B", "C"], ) @@ -301,12 +301,12 @@ def test_count_non_nulls(): def test_count_object(): - df = pd.DataFrame({"a": ["a"] * 3 + ["b"] * 3, "c": [2] * 3 + [3] * 3}) + df = DataFrame({"a": ["a"] * 3 + ["b"] * 3, "c": [2] * 3 + [3] * 3}) result = df.groupby("c").a.count() expected = Series([3, 3], index=pd.Index([2, 3], name="c"), name="a") tm.assert_series_equal(result, expected) - df = pd.DataFrame({"a": ["a", np.nan, np.nan] + ["b"] * 3, "c": [2] * 3 + [3] * 3}) + df = DataFrame({"a": ["a", np.nan, np.nan] + ["b"] * 3, "c": [2] * 3 + [3] * 3}) result = df.groupby("c").a.count() expected = Series([1, 3], index=pd.Index([2, 3], name="c"), name="a") tm.assert_series_equal(result, expected) @@ -318,7 +318,7 @@ def test_count_cross_type(): (np.random.randint(0, 5, (100, 2)), np.random.randint(0, 2, (100, 2))) ) - df = pd.DataFrame(vals, columns=["a", "b", "c", "d"]) + df = DataFrame(vals, columns=["a", "b", "c", "d"]) df[df == 2] = np.nan expected = df.groupby(["c", "d"]).count() diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index ad2e61ad99389..448e6c6e6f64a 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -26,9 +26,9 @@ def test_filter_series(): def test_filter_single_column_df(): - df = pd.DataFrame([1, 3, 20, 5, 22, 24, 7]) - expected_odd = pd.DataFrame([1, 3, 5, 7], index=[0, 1, 3, 6]) - expected_even = pd.DataFrame([20, 22, 24], index=[2, 4, 5]) + df = DataFrame([1, 3, 20, 5, 22, 24, 7]) + expected_odd = DataFrame([1, 3, 5, 7], index=[0, 1, 3, 6]) + expected_even = DataFrame([20, 22, 24], index=[2, 4, 5]) grouper = df[0].apply(lambda x: x % 2) grouped = df.groupby(grouper) tm.assert_frame_equal(grouped.filter(lambda x: x.mean() < 10), expected_odd) @@ -45,20 +45,20 @@ def test_filter_single_column_df(): def test_filter_multi_column_df(): - df = pd.DataFrame({"A": [1, 12, 12, 1], "B": [1, 1, 1, 1]}) + df = DataFrame({"A": [1, 12, 12, 1], "B": [1, 1, 1, 1]}) grouper = df["A"].apply(lambda x: x % 2) grouped = df.groupby(grouper) - expected = pd.DataFrame({"A": [12, 12], "B": [1, 1]}, index=[1, 2]) + expected = DataFrame({"A": [12, 12], "B": [1, 1]}, index=[1, 2]) tm.assert_frame_equal( grouped.filter(lambda x: x["A"].sum() - x["B"].sum() > 10), expected ) def test_filter_mixed_df(): - df = pd.DataFrame({"A": [1, 12, 12, 1], "B": "a b c d".split()}) + df = DataFrame({"A": [1, 12, 12, 1], "B": "a b c d".split()}) grouper = df["A"].apply(lambda x: x % 2) grouped = df.groupby(grouper) - expected = pd.DataFrame({"A": [12, 12], "B": ["b", "c"]}, index=[1, 2]) + expected = DataFrame({"A": [12, 12], "B": ["b", "c"]}, index=[1, 2]) tm.assert_frame_equal(grouped.filter(lambda x: x["A"].sum() > 10), expected) @@ -67,7 +67,7 @@ def test_filter_out_all_groups(): grouper = s.apply(lambda x: x % 2) grouped = s.groupby(grouper) tm.assert_series_equal(grouped.filter(lambda x: x.mean() > 1000), s[[]]) - df = pd.DataFrame({"A": [1, 12, 12, 1], "B": "a b c d".split()}) + df = DataFrame({"A": [1, 12, 12, 1], "B": "a b c d".split()}) grouper = df["A"].apply(lambda x: x % 2) grouped = df.groupby(grouper) tm.assert_frame_equal(grouped.filter(lambda x: x["A"].sum() > 1000), df.loc[[]]) @@ -79,7 +79,7 @@ def test_filter_out_no_groups(): grouped = s.groupby(grouper) filtered = grouped.filter(lambda x: x.mean() > 0) tm.assert_series_equal(filtered, s) - df = pd.DataFrame({"A": [1, 12, 12, 1], "B": "a b c d".split()}) + df = DataFrame({"A": [1, 12, 12, 1], "B": "a b c d".split()}) grouper = df["A"].apply(lambda x: x % 2) grouped = df.groupby(grouper) filtered = grouped.filter(lambda x: x["A"].mean() > 0) @@ -88,16 +88,16 @@ def test_filter_out_no_groups(): def test_filter_out_all_groups_in_df(): # GH12768 - df = pd.DataFrame({"a": [1, 1, 2], "b": [1, 2, 0]}) + df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 0]}) res = df.groupby("a") res = res.filter(lambda x: x["b"].sum() > 5, dropna=False) - expected = pd.DataFrame({"a": [np.nan] * 3, "b": [np.nan] * 3}) + expected = DataFrame({"a": [np.nan] * 3, "b": [np.nan] * 3}) tm.assert_frame_equal(expected, res) - df = pd.DataFrame({"a": [1, 1, 2], "b": [1, 2, 0]}) + df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 0]}) res = df.groupby("a") res = res.filter(lambda x: x["b"].sum() > 5, dropna=True) - expected = pd.DataFrame({"a": [], "b": []}, dtype="int64") + expected = DataFrame({"a": [], "b": []}, dtype="int64") tm.assert_frame_equal(expected, res) @@ -119,7 +119,7 @@ def raise_if_sum_is_zero(x): def test_filter_with_axis_in_groupby(): # issue 11041 index = pd.MultiIndex.from_product([range(10), [0, 1]]) - data = pd.DataFrame(np.arange(100).reshape(-1, 20), columns=index, dtype="int64") + data = DataFrame(np.arange(100).reshape(-1, 20), columns=index, dtype="int64") result = data.groupby(level=0, axis=1).filter(lambda x: x.iloc[0, 0] > 10) expected = data.iloc[:, 12:20] tm.assert_frame_equal(result, expected) @@ -551,7 +551,7 @@ def test_filter_has_access_to_grouped_cols(): def test_filter_enforces_scalarness(): - df = pd.DataFrame( + df = DataFrame( [ ["best", "a", "x"], ["worst", "b", "y"], @@ -568,7 +568,7 @@ def test_filter_enforces_scalarness(): def test_filter_non_bool_raises(): - df = pd.DataFrame( + df = DataFrame( [ ["best", "a", 1], ["worst", "b", 1], diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 7a309db143758..6d760035246c7 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -90,7 +90,7 @@ def test_min_date_with_nans(): dates = pd.to_datetime( Series(["2019-05-09", "2019-05-09", "2019-05-09"]), format="%Y-%m-%d" ).dt.date - df = pd.DataFrame({"a": [np.nan, "1", np.nan], "b": [0, 1, 1], "c": dates}) + df = DataFrame({"a": [np.nan, "1", np.nan], "b": [0, 1, 1], "c": dates}) result = df.groupby("b", as_index=False)["c"].min()["c"] expected = pd.to_datetime( @@ -122,7 +122,7 @@ def test_intercept_builtin_sum(): @pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]]) # Single key # Multi-key def test_builtins_apply(keys, f): # see gh-8155 - df = pd.DataFrame(np.random.randint(1, 50, (1000, 2)), columns=["jim", "joe"]) + df = DataFrame(np.random.randint(1, 50, (1000, 2)), columns=["jim", "joe"]) df["jolie"] = np.random.randn(1000) fname = f.__name__ @@ -151,7 +151,7 @@ def test_arg_passthru(): # GH3668 # GH5724 - df = pd.DataFrame( + df = DataFrame( { "group": [1, 1, 2], "int": [1, 2, 3], @@ -179,7 +179,7 @@ def test_arg_passthru(): expected_columns_numeric = Index(["int", "float", "category_int"]) # mean / median - expected = pd.DataFrame( + expected = DataFrame( { "category_int": [7.5, 9], "float": [4.5, 6.0], @@ -308,7 +308,7 @@ def test_non_cython_api(): levels=[["B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]], codes=[[0] * 8, list(range(8))], ) - expected = pd.DataFrame( + expected = DataFrame( [ [1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0], [0.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], @@ -385,7 +385,7 @@ def test_cython_median(): def test_median_empty_bins(observed): - df = pd.DataFrame(np.random.randint(0, 44, 500)) + df = DataFrame(np.random.randint(0, 44, 500)) grps = range(0, 55, 5) bins = pd.cut(df[0], grps) @@ -411,7 +411,7 @@ def test_median_empty_bins(observed): ) def test_groupby_non_arithmetic_agg_types(dtype, method, data): # GH9311, GH6620 - df = pd.DataFrame( + df = DataFrame( [{"a": 1, "b": 1}, {"a": 1, "b": 2}, {"a": 2, "b": 3}, {"a": 2, "b": 4}] ) @@ -426,7 +426,7 @@ def test_groupby_non_arithmetic_agg_types(dtype, method, data): out_type = dtype exp = data["df"] - df_out = pd.DataFrame(exp) + df_out = DataFrame(exp) df_out["b"] = df_out.b.astype(out_type) df_out.set_index("a", inplace=True) @@ -448,7 +448,7 @@ def test_groupby_non_arithmetic_agg_types(dtype, method, data): ) def test_groupby_non_arithmetic_agg_int_like_precision(i): # see gh-6620, gh-9311 - df = pd.DataFrame([{"a": 1, "b": i[0]}, {"a": 1, "b": i[1]}]) + df = DataFrame([{"a": 1, "b": i[0]}, {"a": 1, "b": i[1]}]) grp_exp = { "first": {"expected": i[0]}, @@ -478,7 +478,7 @@ def test_groupby_non_arithmetic_agg_int_like_precision(i): ) def test_idxmin_idxmax_returns_int_types(func, values): # GH 25444 - df = pd.DataFrame( + df = DataFrame( { "name": ["A", "A", "B", "B"], "c_int": [1, 2, 3, 4], @@ -490,21 +490,21 @@ def test_idxmin_idxmax_returns_int_types(func, values): result = getattr(df.groupby("name"), func)() - expected = pd.DataFrame(values, index=Index(["A", "B"], name="name")) + expected = DataFrame(values, index=Index(["A", "B"], name="name")) tm.assert_frame_equal(result, expected) def test_groupby_cumprod(): # GH 4095 - df = pd.DataFrame({"key": ["b"] * 10, "value": 2}) + df = DataFrame({"key": ["b"] * 10, "value": 2}) actual = df.groupby("key")["value"].cumprod() expected = df.groupby("key")["value"].apply(lambda x: x.cumprod()) expected.name = "value" tm.assert_series_equal(actual, expected) - df = pd.DataFrame({"key": ["b"] * 100, "value": 2}) + df = DataFrame({"key": ["b"] * 100, "value": 2}) actual = df.groupby("key")["value"].cumprod() # if overflows, groupby product casts to float # while numpy passes back invalid values @@ -648,7 +648,7 @@ def test_nsmallest(): @pytest.mark.parametrize("func", ["cumprod", "cumsum"]) def test_numpy_compat(func): # see gh-12811 - df = pd.DataFrame({"A": [1, 2, 1], "B": [1, 2, 3]}) + df = DataFrame({"A": [1, 2, 1], "B": [1, 2, 3]}) g = df.groupby("A") msg = "numpy operations are not valid with groupby" @@ -664,14 +664,12 @@ def test_cummin(numpy_dtypes_for_minmax): min_val = numpy_dtypes_for_minmax[1] # GH 15048 - base_df = pd.DataFrame( - {"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]} - ) + base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]}) expected_mins = [3, 3, 3, 2, 2, 2, 2, 1] df = base_df.astype(dtype) - expected = pd.DataFrame({"B": expected_mins}).astype(dtype) + expected = DataFrame({"B": expected_mins}).astype(dtype) result = df.groupby("A").cummin() tm.assert_frame_equal(result, expected) result = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() @@ -687,30 +685,30 @@ def test_cummin(numpy_dtypes_for_minmax): # Test nan in some values base_df.loc[[0, 2, 4, 6], "B"] = np.nan - expected = pd.DataFrame({"B": [np.nan, 4, np.nan, 2, np.nan, 3, np.nan, 1]}) + expected = DataFrame({"B": [np.nan, 4, np.nan, 2, np.nan, 3, np.nan, 1]}) result = base_df.groupby("A").cummin() tm.assert_frame_equal(result, expected) expected = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() tm.assert_frame_equal(result, expected) # GH 15561 - df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(["2001"]))) + df = DataFrame(dict(a=[1], b=pd.to_datetime(["2001"]))) expected = Series(pd.to_datetime("2001"), index=[0], name="b") result = df.groupby("a")["b"].cummin() tm.assert_series_equal(expected, result) # GH 15635 - df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2])) + df = DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2])) result = df.groupby("a").b.cummin() expected = Series([1, 2, 1], name="b") tm.assert_series_equal(result, expected) def test_cummin_all_nan_column(): - base_df = pd.DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8}) + base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8}) - expected = pd.DataFrame({"B": [np.nan] * 8}) + expected = DataFrame({"B": [np.nan] * 8}) result = base_df.groupby("A").cummin() tm.assert_frame_equal(expected, result) result = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() @@ -722,14 +720,12 @@ def test_cummax(numpy_dtypes_for_minmax): max_val = numpy_dtypes_for_minmax[2] # GH 15048 - base_df = pd.DataFrame( - {"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]} - ) + base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]}) expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3] df = base_df.astype(dtype) - expected = pd.DataFrame({"B": expected_maxs}).astype(dtype) + expected = DataFrame({"B": expected_maxs}).astype(dtype) result = df.groupby("A").cummax() tm.assert_frame_equal(result, expected) result = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() @@ -745,30 +741,30 @@ def test_cummax(numpy_dtypes_for_minmax): # Test nan in some values base_df.loc[[0, 2, 4, 6], "B"] = np.nan - expected = pd.DataFrame({"B": [np.nan, 4, np.nan, 4, np.nan, 3, np.nan, 3]}) + expected = DataFrame({"B": [np.nan, 4, np.nan, 4, np.nan, 3, np.nan, 3]}) result = base_df.groupby("A").cummax() tm.assert_frame_equal(result, expected) expected = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() tm.assert_frame_equal(result, expected) # GH 15561 - df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(["2001"]))) + df = DataFrame(dict(a=[1], b=pd.to_datetime(["2001"]))) expected = Series(pd.to_datetime("2001"), index=[0], name="b") result = df.groupby("a")["b"].cummax() tm.assert_series_equal(expected, result) # GH 15635 - df = pd.DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1])) + df = DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1])) result = df.groupby("a").b.cummax() expected = Series([2, 1, 2], name="b") tm.assert_series_equal(result, expected) def test_cummax_all_nan_column(): - base_df = pd.DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8}) + base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8}) - expected = pd.DataFrame({"B": [np.nan] * 8}) + expected = DataFrame({"B": [np.nan] * 8}) result = base_df.groupby("A").cummax() tm.assert_frame_equal(expected, result) result = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() @@ -800,7 +796,7 @@ def test_is_monotonic_increasing(in_vals, out_vals): "B": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "d", "d"], "C": in_vals, } - df = pd.DataFrame(source_dict) + df = DataFrame(source_dict) result = df.groupby("B").C.is_monotonic_increasing index = Index(list("abcd"), name="B") expected = Series(index=index, data=out_vals, name="C") @@ -837,7 +833,7 @@ def test_is_monotonic_decreasing(in_vals, out_vals): "C": in_vals, } - df = pd.DataFrame(source_dict) + df = DataFrame(source_dict) result = df.groupby("B").C.is_monotonic_decreasing index = Index(list("abcd"), name="B") expected = Series(index=index, data=out_vals, name="C") @@ -887,7 +883,7 @@ def test_frame_describe_multikey(tsframe): levels=[[col], group.columns], codes=[[0] * len(group.columns), range(len(group.columns))], ) - group = pd.DataFrame(group.values, columns=group_col, index=group.index) + group = DataFrame(group.values, columns=group_col, index=group.index) desc_groups.append(group) expected = pd.concat(desc_groups, axis=1) tm.assert_frame_equal(result, expected) @@ -929,13 +925,13 @@ def test_frame_describe_unstacked_format(): pd.Timestamp("2011-01-06 12:43:33", tz=None): 5000000000, pd.Timestamp("2011-01-06 12:54:09", tz=None): 100000000, } - df = pd.DataFrame({"PRICE": prices, "VOLUME": volumes}) + df = DataFrame({"PRICE": prices, "VOLUME": volumes}) result = df.groupby("PRICE").VOLUME.describe() data = [ df[df.PRICE == 24990].VOLUME.describe().values.tolist(), df[df.PRICE == 25499].VOLUME.describe().values.tolist(), ] - expected = pd.DataFrame( + expected = DataFrame( data, index=pd.Index([24990, 25499], name="PRICE"), columns=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], @@ -951,7 +947,7 @@ def test_frame_describe_unstacked_format(): @pytest.mark.parametrize("as_index", [True, False]) def test_describe_with_duplicate_output_column_names(as_index): # GH 35314 - df = pd.DataFrame( + df = DataFrame( { "a": [99, 99, 99, 88, 88, 88], "b": [1, 2, 3, 4, 5, 6], @@ -1007,7 +1003,7 @@ def test_describe_with_duplicate_output_column_names(as_index): def test_groupby_mean_no_overflow(): # Regression test for (#22487) - df = pd.DataFrame( + df = DataFrame( { "user": ["A", "A", "A", "A", "A"], "connections": [4970, 4749, 4719, 4704, 18446744073699999744], @@ -1032,9 +1028,9 @@ def test_apply_to_nullable_integer_returns_float(values, function): output = 0.5 if function == "var" else 1.5 arr = np.array([output] * 3, dtype=float) idx = pd.Index([1, 2, 3], dtype=object, name="a") - expected = pd.DataFrame({"b": arr}, index=idx) + expected = DataFrame({"b": arr}, index=idx) - groups = pd.DataFrame(values, dtype="Int64").groupby("a") + groups = DataFrame(values, dtype="Int64").groupby("a") result = getattr(groups, function)() tm.assert_frame_equal(result, expected) @@ -1049,7 +1045,7 @@ def test_apply_to_nullable_integer_returns_float(values, function): def test_groupby_sum_below_mincount_nullable_integer(): # https://github.com/pandas-dev/pandas/issues/32861 - df = pd.DataFrame({"a": [0, 1, 2], "b": [0, 1, 2], "c": [0, 1, 2]}, dtype="Int64") + df = DataFrame({"a": [0, 1, 2], "b": [0, 1, 2], "c": [0, 1, 2]}, dtype="Int64") grouped = df.groupby("a") idx = pd.Index([0, 1, 2], dtype=object, name="a") @@ -1058,7 +1054,5 @@ def test_groupby_sum_below_mincount_nullable_integer(): tm.assert_series_equal(result, expected) result = grouped.sum(min_count=2) - expected = pd.DataFrame( - {"b": [pd.NA] * 3, "c": [pd.NA] * 3}, dtype="Int64", index=idx - ) + expected = DataFrame({"b": [pd.NA] * 3, "c": [pd.NA] * 3}, dtype="Int64", index=idx) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index a1c00eb5f38f5..1c8c7cbaa68c5 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -248,7 +248,7 @@ def test_len(): assert len(grouped) == expected # issue 11016 - df = pd.DataFrame(dict(a=[np.nan] * 3, b=[1, 2, 3])) + df = DataFrame(dict(a=[np.nan] * 3, b=[1, 2, 3])) assert len(df.groupby("a")) == 0 assert len(df.groupby("b")) == 3 assert len(df.groupby(["a", "b"])) == 3 @@ -594,7 +594,7 @@ def test_groupby_multiple_columns(df, op): def test_as_index_select_column(): # GH 5764 - df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) + df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) result = df.groupby("A", as_index=False)["B"].get_group(1) expected = Series([2, 4], name="B") tm.assert_series_equal(result, expected) @@ -1186,7 +1186,7 @@ def test_groupby_dtype_inference_empty(): def test_groupby_unit64_float_conversion(): #  GH: 30859 groupby converts unit64 to floats sometimes - df = pd.DataFrame({"first": [1], "second": [1], "value": [16148277970000000000]}) + df = DataFrame({"first": [1], "second": [1], "value": [16148277970000000000]}) result = df.groupby(["first", "second"])["value"].max() expected = Series( [16148277970000000000], @@ -1217,7 +1217,7 @@ def test_groupby_keys_same_size_as_index(): index = pd.date_range( start=pd.Timestamp("2015-09-29T11:34:44-0700"), periods=2, freq=freq ) - df = pd.DataFrame([["A", 10], ["B", 15]], columns=["metric", "values"], index=index) + df = DataFrame([["A", 10], ["B", 15]], columns=["metric", "values"], index=index) result = df.groupby([pd.Grouper(level=0, freq=freq), "metric"]).mean() expected = df.set_index([df.index, "metric"]) @@ -1227,17 +1227,17 @@ def test_groupby_keys_same_size_as_index(): def test_groupby_one_row(): # GH 11741 msg = r"^'Z'$" - df1 = pd.DataFrame(np.random.randn(1, 4), columns=list("ABCD")) + df1 = DataFrame(np.random.randn(1, 4), columns=list("ABCD")) with pytest.raises(KeyError, match=msg): df1.groupby("Z") - df2 = pd.DataFrame(np.random.randn(2, 4), columns=list("ABCD")) + df2 = DataFrame(np.random.randn(2, 4), columns=list("ABCD")) with pytest.raises(KeyError, match=msg): df2.groupby("Z") def test_groupby_nat_exclude(): # GH 6992 - df = pd.DataFrame( + df = DataFrame( { "values": np.random.randn(8), "dt": [ @@ -1454,7 +1454,7 @@ def foo(x): def test_group_name_available_in_inference_pass(): # gh-15062 - df = pd.DataFrame({"a": [0, 0, 1, 1, 2, 2], "b": np.arange(6)}) + df = DataFrame({"a": [0, 0, 1, 1, 2, 2], "b": np.arange(6)}) names = [] @@ -1733,7 +1733,7 @@ def test_group_shift_lose_timezone(): def test_pivot_table_values_key_error(): # This test is designed to replicate the error in issue #14938 - df = pd.DataFrame( + df = DataFrame( { "eventDate": pd.date_range(datetime.today(), periods=20, freq="M").tolist(), "thename": range(0, 20), @@ -1762,7 +1762,7 @@ def test_empty_dataframe_groupby(): def test_tuple_as_grouping(): # https://github.com/pandas-dev/pandas/issues/18314 - df = pd.DataFrame( + df = DataFrame( { ("a", "b"): [1, 1, 1, 1], "a": [2, 2, 2, 2], @@ -1781,7 +1781,7 @@ def test_tuple_as_grouping(): def test_tuple_correct_keyerror(): # https://github.com/pandas-dev/pandas/issues/18798 - df = pd.DataFrame( + df = DataFrame( 1, index=range(3), columns=pd.MultiIndex.from_product([[1, 2], [3, 4]]) ) with pytest.raises(KeyError, match=r"^\(7, 8\)$"): @@ -1790,13 +1790,13 @@ def test_tuple_correct_keyerror(): def test_groupby_agg_ohlc_non_first(): # GH 21716 - df = pd.DataFrame( + df = DataFrame( [[1], [1]], columns=["foo"], index=pd.date_range("2018-01-01", periods=2, freq="D"), ) - expected = pd.DataFrame( + expected = DataFrame( [[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]], columns=pd.MultiIndex.from_tuples( ( @@ -1860,7 +1860,7 @@ def test_groupby_groups_in_BaseGrouper(): # GH 26326 # Test if DataFrame grouped with a pandas.Grouper has correct groups mi = pd.MultiIndex.from_product([["A", "B"], ["C", "D"]], names=["alpha", "beta"]) - df = pd.DataFrame({"foo": [1, 2, 1, 2], "bar": [1, 2, 3, 4]}, index=mi) + df = DataFrame({"foo": [1, 2, 1, 2], "bar": [1, 2, 3, 4]}, index=mi) result = df.groupby([pd.Grouper(level="alpha"), "beta"]) expected = df.groupby(["alpha", "beta"]) assert result.groups == expected.groups @@ -1873,7 +1873,7 @@ def test_groupby_groups_in_BaseGrouper(): @pytest.mark.parametrize("group_name", ["x", ["x"]]) def test_groupby_axis_1(group_name): # GH 27614 - df = pd.DataFrame( + df = DataFrame( np.arange(12).reshape(3, 4), index=[0, 1, 0], columns=[10, 20, 10, 20] ) df.index.name = "y" @@ -1886,7 +1886,7 @@ def test_groupby_axis_1(group_name): # test on MI column iterables = [["bar", "baz", "foo"], ["one", "two"]] mi = pd.MultiIndex.from_product(iterables=iterables, names=["x", "x1"]) - df = pd.DataFrame(np.arange(18).reshape(3, 6), index=[0, 1, 0], columns=mi) + df = DataFrame(np.arange(18).reshape(3, 6), index=[0, 1, 0], columns=mi) results = df.groupby(group_name, axis=1).sum() expected = df.T.groupby(group_name).sum().T tm.assert_frame_equal(results, expected) @@ -1961,7 +1961,7 @@ def test_shift_bfill_ffill_tz(tz_naive_fixture, op, expected): def test_groupby_only_none_group(): # see GH21624 # this was crashing with "ValueError: Length of passed values is 1, index implies 0" - df = pd.DataFrame({"g": [None], "x": 1}) + df = DataFrame({"g": [None], "x": 1}) actual = df.groupby("g")["x"].transform("sum") expected = Series([np.nan], name="x") @@ -1981,7 +1981,7 @@ def test_groupby_duplicate_index(): @pytest.mark.parametrize("bool_agg_func", ["any", "all"]) def test_bool_aggs_dup_column_labels(bool_agg_func): # 21668 - df = pd.DataFrame([[True, True]], columns=["a", "a"]) + df = DataFrame([[True, True]], columns=["a", "a"]) grp_by = df.groupby([0]) result = getattr(grp_by, bool_agg_func)() @@ -1997,7 +1997,7 @@ def test_dup_labels_output_shape(groupby_func, idx): if groupby_func in {"size", "ngroup", "cumcount"}: pytest.skip("Not applicable") - df = pd.DataFrame([[1, 1]], columns=idx) + df = DataFrame([[1, 1]], columns=idx) grp_by = df.groupby([0]) args = [] @@ -2017,7 +2017,7 @@ def test_dup_labels_output_shape(groupby_func, idx): def test_groupby_crash_on_nunique(axis): # Fix following 30253 - df = pd.DataFrame({("A", "B"): [1, 2], ("A", "C"): [1, 3], ("D", "B"): [0, 0]}) + df = DataFrame({("A", "B"): [1, 2], ("A", "C"): [1, 3], ("D", "B"): [0, 0]}) axis_number = df._get_axis_number(axis) if not axis_number: @@ -2025,7 +2025,7 @@ def test_groupby_crash_on_nunique(axis): result = df.groupby(axis=axis_number, level=0).nunique() - expected = pd.DataFrame({"A": [1, 2], "D": [1, 1]}) + expected = DataFrame({"A": [1, 2], "D": [1, 1]}) if not axis_number: expected = expected.T @@ -2034,7 +2034,7 @@ def test_groupby_crash_on_nunique(axis): def test_groupby_list_level(): # GH 9790 - expected = pd.DataFrame(np.arange(0, 9).reshape(3, 3)) + expected = DataFrame(np.arange(0, 9).reshape(3, 3)) result = expected.groupby(level=[0]).mean() tm.assert_frame_equal(result, expected) @@ -2048,7 +2048,7 @@ def test_groupby_list_level(): ) def test_groups_repr_truncates(max_seq_items, expected): # GH 1135 - df = pd.DataFrame(np.random.randn(5, 1)) + df = DataFrame(np.random.randn(5, 1)) df["a"] = df.index with pd.option_context("display.max_seq_items", max_seq_items): @@ -2061,7 +2061,7 @@ def test_groups_repr_truncates(max_seq_items, expected): def test_group_on_two_row_multiindex_returns_one_tuple_key(): # GH 18451 - df = pd.DataFrame([{"a": 1, "b": 2, "c": 99}, {"a": 1, "b": 2, "c": 88}]) + df = DataFrame([{"a": 1, "b": 2, "c": 99}, {"a": 1, "b": 2, "c": 88}]) df = df.set_index(["a", "b"]) grp = df.groupby(["a", "b"]) @@ -2106,7 +2106,7 @@ def test_group_on_two_row_multiindex_returns_one_tuple_key(): ) def test_subsetting_columns_keeps_attrs(klass, attr, value): # GH 9959 - When subsetting columns, don't drop attributes - df = pd.DataFrame({"a": [1], "b": [2], "c": [3]}) + df = DataFrame({"a": [1], "b": [2], "c": [3]}) if attr != "axis": df = df.set_index("a") @@ -2119,7 +2119,7 @@ def test_subsetting_columns_keeps_attrs(klass, attr, value): def test_groupby_column_index_name_lost(func): # GH: 29764 groupby loses index sometimes expected = pd.Index(["a"], name="idx") - df = pd.DataFrame([[1]], columns=expected) + df = DataFrame([[1]], columns=expected) df_grouped = df.groupby([1]) result = getattr(df_grouped, func)().columns tm.assert_index_equal(result, expected) @@ -2127,10 +2127,10 @@ def test_groupby_column_index_name_lost(func): def test_groupby_duplicate_columns(): # GH: 31735 - df = pd.DataFrame( + df = DataFrame( {"A": ["f", "e", "g", "h"], "B": ["a", "b", "c", "d"], "C": [1, 2, 3, 4]} ).astype(object) df.columns = ["A", "B", "B"] result = df.groupby([0, 0, 0, 0]).min() - expected = pd.DataFrame([["e", "a", 1]], columns=["A", "B", "B"]) + expected = DataFrame([["e", "a", 1]], columns=["A", "B", "B"]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 3b3967b858adf..48859db305e46 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -158,7 +158,7 @@ def test_grouper_multilevel_freq(self): d0 = date.today() - timedelta(days=14) dates = date_range(d0, date.today()) date_index = pd.MultiIndex.from_product([dates, dates], names=["foo", "bar"]) - df = pd.DataFrame(np.random.randint(0, 100, 225), index=date_index) + df = DataFrame(np.random.randint(0, 100, 225), index=date_index) # Check string level expected = ( @@ -258,7 +258,7 @@ def test_grouper_column_and_index(self): [("a", 1), ("a", 2), ("a", 3), ("b", 1), ("b", 2), ("b", 3)] ) idx.names = ["outer", "inner"] - df_multi = pd.DataFrame( + df_multi = DataFrame( {"A": np.arange(6), "B": ["one", "one", "two", "two", "one", "one"]}, index=idx, ) @@ -289,7 +289,7 @@ def test_groupby_levels_and_columns(self): idx = pd.MultiIndex.from_tuples( [(1, 1), (1, 2), (3, 4), (5, 6)], names=idx_names ) - df = pd.DataFrame(np.arange(12).reshape(-1, 3), index=idx) + df = DataFrame(np.arange(12).reshape(-1, 3), index=idx) by_levels = df.groupby(level=idx_names).mean() # reset_index changes columns dtype to object @@ -407,7 +407,7 @@ def test_multiindex_passthru(self): # GH 7997 # regression from 0.14.1 - df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) df.columns = pd.MultiIndex.from_tuples([(0, 1), (1, 1), (2, 1)]) result = df.groupby(axis=1, level=[0, 1]).first() @@ -463,7 +463,7 @@ def test_multiindex_columns_empty_level(self): def test_groupby_multiindex_tuple(self): # GH 17979 - df = pd.DataFrame( + df = DataFrame( [[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]], columns=pd.MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]), ) @@ -471,7 +471,7 @@ def test_groupby_multiindex_tuple(self): result = df.groupby(("b", 1)).groups tm.assert_dict_equal(expected, result) - df2 = pd.DataFrame( + df2 = DataFrame( df.values, columns=pd.MultiIndex.from_arrays( [["a", "b", "b", "c"], ["d", "d", "e", "e"]] @@ -481,7 +481,7 @@ def test_groupby_multiindex_tuple(self): result = df.groupby(("b", 1)).groups tm.assert_dict_equal(expected, result) - df3 = pd.DataFrame(df.values, columns=[("a", "d"), ("b", "d"), ("b", "e"), "c"]) + df3 = DataFrame(df.values, columns=[("a", "d"), ("b", "d"), ("b", "e"), "c"]) expected = df3.groupby([("b", "d")]).groups result = df.groupby(("b", 1)).groups tm.assert_dict_equal(expected, result) @@ -596,7 +596,7 @@ def test_grouping_labels(self, mframe): def test_list_grouper_with_nat(self): # GH 14715 - df = pd.DataFrame({"date": pd.date_range("1/1/2011", periods=365, freq="D")}) + df = DataFrame({"date": pd.date_range("1/1/2011", periods=365, freq="D")}) df.iloc[-1] = pd.NaT grouper = pd.Grouper(key="date", freq="AS") @@ -632,7 +632,7 @@ def test_evaluate_with_empty_groups(self, func, expected): # test transform'ing empty groups # (not testing other agg fns, because they return # different index objects. - df = pd.DataFrame({1: [], 2: []}) + df = DataFrame({1: [], 2: []}) g = df.groupby(1) result = getattr(g[2], func)(lambda x: x) tm.assert_series_equal(result, expected) @@ -680,13 +680,13 @@ def test_groupby_level_index_value_all_na(self): def test_groupby_multiindex_level_empty(self): # https://github.com/pandas-dev/pandas/issues/31670 - df = pd.DataFrame( + df = DataFrame( [[123, "a", 1.0], [123, "b", 2.0]], columns=["id", "category", "value"] ) df = df.set_index(["id", "category"]) empty = df[df.value < 0] result = empty.groupby("id").sum() - expected = pd.DataFrame( + expected = DataFrame( dtype="float64", columns=["value"], index=pd.Int64Index([], name="id") ) tm.assert_frame_equal(result, expected) @@ -746,7 +746,7 @@ def test_get_group(self): def test_get_group_empty_bins(self, observed): - d = pd.DataFrame([3, 1, 7, 6]) + d = DataFrame([3, 1, 7, 6]) bins = [0, 5, 10, 15] g = d.groupby(pd.cut(d[0], bins), observed=observed) @@ -784,10 +784,10 @@ def test_groupby_with_empty(self): assert next(iter(grouped), None) is None def test_groupby_with_single_column(self): - df = pd.DataFrame({"a": list("abssbab")}) + df = DataFrame({"a": list("abssbab")}) tm.assert_frame_equal(df.groupby("a").get_group("a"), df.iloc[[0, 5]]) # GH 13530 - exp = pd.DataFrame(index=pd.Index(["a", "b", "s"], name="a")) + exp = DataFrame(index=pd.Index(["a", "b", "s"], name="a")) tm.assert_frame_equal(df.groupby("a").count(), exp) tm.assert_frame_equal(df.groupby("a").sum(), exp) tm.assert_frame_equal(df.groupby("a").nth(1), exp) @@ -796,7 +796,7 @@ def test_gb_key_len_equal_axis_len(self): # GH16843 # test ensures that index and column keys are recognized correctly # when number of keys equals axis length of groupby - df = pd.DataFrame( + df = DataFrame( [["foo", "bar", "B", 1], ["foo", "bar", "B", 2], ["foo", "baz", "C", 3]], columns=["first", "second", "third", "one"], ) @@ -905,7 +905,7 @@ def test_dictify(self, df): def test_groupby_with_small_elem(self): # GH 8542 # length=2 - df = pd.DataFrame( + df = DataFrame( {"event": ["start", "start"], "change": [1234, 5678]}, index=pd.DatetimeIndex(["2014-09-10", "2013-10-10"]), ) @@ -920,7 +920,7 @@ def test_groupby_with_small_elem(self): res = grouped.get_group((pd.Timestamp("2013-10-31"), "start")) tm.assert_frame_equal(res, df.iloc[[1], :]) - df = pd.DataFrame( + df = DataFrame( {"event": ["start", "start", "start"], "change": [1234, 5678, 9123]}, index=pd.DatetimeIndex(["2014-09-10", "2013-10-10", "2014-09-15"]), ) @@ -936,7 +936,7 @@ def test_groupby_with_small_elem(self): tm.assert_frame_equal(res, df.iloc[[1], :]) # length=3 - df = pd.DataFrame( + df = DataFrame( {"event": ["start", "start", "start"], "change": [1234, 5678, 9123]}, index=pd.DatetimeIndex(["2014-09-10", "2013-10-10", "2014-08-05"]), ) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 7dd37163021ed..fe35f6f5d9416 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -57,9 +57,7 @@ def test_first_last_nth(df): @pytest.mark.parametrize("method", ["first", "last"]) def test_first_last_with_na_object(method, nulls_fixture): # https://github.com/pandas-dev/pandas/issues/32123 - groups = pd.DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 3, nulls_fixture]}).groupby( - "a" - ) + groups = DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 3, nulls_fixture]}).groupby("a") result = getattr(groups, method)() if method == "first": @@ -69,7 +67,7 @@ def test_first_last_with_na_object(method, nulls_fixture): values = np.array(values, dtype=result["b"].dtype) idx = pd.Index([1, 2], name="a") - expected = pd.DataFrame({"b": values}, index=idx) + expected = DataFrame({"b": values}, index=idx) tm.assert_frame_equal(result, expected) @@ -77,9 +75,7 @@ def test_first_last_with_na_object(method, nulls_fixture): @pytest.mark.parametrize("index", [0, -1]) def test_nth_with_na_object(index, nulls_fixture): # https://github.com/pandas-dev/pandas/issues/32123 - groups = pd.DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 3, nulls_fixture]}).groupby( - "a" - ) + groups = DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 3, nulls_fixture]}).groupby("a") result = groups.nth(index) if index == 0: @@ -89,7 +85,7 @@ def test_nth_with_na_object(index, nulls_fixture): values = np.array(values, dtype=result["b"].dtype) idx = pd.Index([1, 2], name="a") - expected = pd.DataFrame({"b": values}, index=idx) + expected = DataFrame({"b": values}, index=idx) tm.assert_frame_equal(result, expected) @@ -142,7 +138,7 @@ def test_first_last_nth_dtypes(df_mixed_floats): def test_first_last_nth_nan_dtype(): # GH 33591 - df = pd.DataFrame({"data": ["A"], "nans": Series([np.nan], dtype=object)}) + df = DataFrame({"data": ["A"], "nans": Series([np.nan], dtype=object)}) grouped = df.groupby("data") expected = df.set_index("data").nans @@ -154,7 +150,7 @@ def test_first_last_nth_nan_dtype(): def test_first_strings_timestamps(): # GH 11244 - test = pd.DataFrame( + test = DataFrame( { pd.Timestamp("2012-01-01 00:00:00"): ["a", "b"], pd.Timestamp("2012-01-02 00:00:00"): ["c", "d"], @@ -387,7 +383,7 @@ def test_first_last_tz(data, expected_first, expected_last): def test_first_last_tz_multi_column(method, ts, alpha): # GH 21603 category_string = Series(list("abc")).astype("category") - df = pd.DataFrame( + df = DataFrame( { "group": [1, 1, 2], "category_string": category_string, @@ -395,7 +391,7 @@ def test_first_last_tz_multi_column(method, ts, alpha): } ) result = getattr(df.groupby("group"), method)() - expected = pd.DataFrame( + expected = DataFrame( { "category_string": pd.Categorical( [alpha, "c"], dtype=category_string.dtype @@ -614,7 +610,7 @@ def test_nth_nan_in_grouper(dropna): columns=list("abc"), ) result = df.groupby("a").nth(0, dropna=dropna) - expected = pd.DataFrame( + expected = DataFrame( [[2, 3], [6, 7]], columns=list("bc"), index=Index(["abc", "def"], name="a") ) diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py index 8e37ac1a1a21d..7edb358170b50 100644 --- a/pandas/tests/groupby/test_nunique.py +++ b/pandas/tests/groupby/test_nunique.py @@ -83,7 +83,7 @@ def test_nunique(): def test_nunique_with_object(): # GH 11077 - data = pd.DataFrame( + data = DataFrame( [ [100, 1, "Alice"], [200, 2, "Bob"], @@ -110,7 +110,7 @@ def test_nunique_with_empty_series(): def test_nunique_with_timegrouper(): # GH 13453 - test = pd.DataFrame( + test = DataFrame( { "time": [ Timestamp("2016-06-28 09:35:35"), @@ -156,22 +156,22 @@ def test_nunique_with_timegrouper(): ) def test_nunique_with_NaT(key, data, dropna, expected): # GH 27951 - df = pd.DataFrame({"key": key, "data": data}) + df = DataFrame({"key": key, "data": data}) result = df.groupby(["key"])["data"].nunique(dropna=dropna) tm.assert_series_equal(result, expected) def test_nunique_preserves_column_level_names(): # GH 23222 - test = pd.DataFrame([1, 2, 2], columns=pd.Index(["A"], name="level_0")) + test = DataFrame([1, 2, 2], columns=pd.Index(["A"], name="level_0")) result = test.groupby([0, 0, 0]).nunique() - expected = pd.DataFrame([2], columns=test.columns) + expected = DataFrame([2], columns=test.columns) tm.assert_frame_equal(result, expected) def test_nunique_transform_with_datetime(): # GH 35109 - transform with nunique on datetimes results in integers - df = pd.DataFrame(date_range("2008-12-31", "2009-01-02"), columns=["date"]) + df = DataFrame(date_range("2008-12-31", "2009-01-02"), columns=["date"]) result = df.groupby([0, 0, 1])["date"].transform("nunique") expected = Series([2, 2, 1], name="date") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py index d2ab016f608fa..6812ac6ce8f34 100644 --- a/pandas/tests/groupby/test_pipe.py +++ b/pandas/tests/groupby/test_pipe.py @@ -42,7 +42,7 @@ def test_pipe_args(): # Test passing args to the pipe method of DataFrameGroupBy. # Issue #17871 - df = pd.DataFrame( + df = DataFrame( { "group": ["A", "A", "B", "B", "C"], "x": [1.0, 2.0, 3.0, 2.0, 5.0], diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 9338742195bfe..14b0d9ab60e52 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -54,18 +54,18 @@ def test_quantile(interpolation, a_vals, b_vals, q): def test_quantile_array(): # https://github.com/pandas-dev/pandas/issues/27526 - df = pd.DataFrame({"A": [0, 1, 2, 3, 4]}) + df = DataFrame({"A": [0, 1, 2, 3, 4]}) result = df.groupby([0, 0, 1, 1, 1]).quantile([0.25]) index = pd.MultiIndex.from_product([[0, 1], [0.25]]) - expected = pd.DataFrame({"A": [0.25, 2.50]}, index=index) + expected = DataFrame({"A": [0.25, 2.50]}, index=index) tm.assert_frame_equal(result, expected) - df = pd.DataFrame({"A": [0, 1, 2, 3], "B": [4, 5, 6, 7]}) + df = DataFrame({"A": [0, 1, 2, 3], "B": [4, 5, 6, 7]}) index = pd.MultiIndex.from_product([[0, 1], [0.25, 0.75]]) result = df.groupby([0, 0, 1, 1]).quantile([0.25, 0.75]) - expected = pd.DataFrame( + expected = DataFrame( {"A": [0.25, 0.75, 2.25, 2.75], "B": [4.25, 4.75, 6.25, 6.75]}, index=index ) tm.assert_frame_equal(result, expected) @@ -73,11 +73,11 @@ def test_quantile_array(): def test_quantile_array2(): # https://github.com/pandas-dev/pandas/pull/28085#issuecomment-524066959 - df = pd.DataFrame( + df = DataFrame( np.random.RandomState(0).randint(0, 5, size=(10, 3)), columns=list("ABC") ) result = df.groupby("A").quantile([0.3, 0.7]) - expected = pd.DataFrame( + expected = DataFrame( { "B": [0.9, 2.1, 2.2, 3.4, 1.6, 2.4, 2.3, 2.7, 0.0, 0.0], "C": [1.2, 2.8, 1.8, 3.0, 0.0, 0.0, 1.9, 3.1, 3.0, 3.0], @@ -90,16 +90,16 @@ def test_quantile_array2(): def test_quantile_array_no_sort(): - df = pd.DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]}) + df = DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]}) result = df.groupby([1, 0, 1], sort=False).quantile([0.25, 0.5, 0.75]) - expected = pd.DataFrame( + expected = DataFrame( {"A": [0.5, 1.0, 1.5, 1.0, 1.0, 1.0], "B": [3.5, 4.0, 4.5, 4.0, 4.0, 4.0]}, index=pd.MultiIndex.from_product([[1, 0], [0.25, 0.5, 0.75]]), ) tm.assert_frame_equal(result, expected) result = df.groupby([1, 0, 1], sort=False).quantile([0.75, 0.25]) - expected = pd.DataFrame( + expected = DataFrame( {"A": [1.5, 0.5, 1.0, 1.0], "B": [4.5, 3.5, 4.0, 4.0]}, index=pd.MultiIndex.from_product([[1, 0], [0.75, 0.25]]), ) @@ -107,7 +107,7 @@ def test_quantile_array_no_sort(): def test_quantile_array_multiple_levels(): - df = pd.DataFrame( + df = DataFrame( {"A": [0, 1, 2], "B": [3, 4, 5], "c": ["a", "a", "a"], "d": ["a", "a", "b"]} ) result = df.groupby(["c", "d"]).quantile([0.25, 0.75]) @@ -115,7 +115,7 @@ def test_quantile_array_multiple_levels(): [("a", "a", 0.25), ("a", "a", 0.75), ("a", "b", 0.25), ("a", "b", 0.75)], names=["c", "d", None], ) - expected = pd.DataFrame( + expected = DataFrame( {"A": [0.25, 0.75, 2.0, 2.0], "B": [3.25, 3.75, 5.0, 5.0]}, index=index ) tm.assert_frame_equal(result, expected) @@ -127,9 +127,7 @@ def test_quantile_array_multiple_levels(): def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby, q): # GH30289 nrow, ncol = frame_size - df = pd.DataFrame( - np.array([ncol * [_ % 4] for _ in range(nrow)]), columns=range(ncol) - ) + df = DataFrame(np.array([ncol * [_ % 4] for _ in range(nrow)]), columns=range(ncol)) idx_levels = [list(range(min(nrow, 4)))] * len(groupby) + [q] idx_codes = [[x for x in range(min(nrow, 4)) for _ in q]] * len(groupby) + [ @@ -142,7 +140,7 @@ def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby, [float(x)] * (ncol - len(groupby)) for x in range(min(nrow, 4)) for _ in q ] expected_columns = [x for x in range(ncol) if x not in groupby] - expected = pd.DataFrame( + expected = DataFrame( expected_values, index=expected_index, columns=expected_columns ) result = df.groupby(groupby).quantile(q) @@ -151,9 +149,7 @@ def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby, def test_quantile_raises(): - df = pd.DataFrame( - [["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"] - ) + df = DataFrame([["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]) with pytest.raises(TypeError, match="cannot be performed against 'object' dtypes"): df.groupby("key").quantile() @@ -161,7 +157,7 @@ def test_quantile_raises(): def test_quantile_out_of_bounds_q_raises(): # https://github.com/pandas-dev/pandas/issues/27470 - df = pd.DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6))) + df = DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6))) g = df.groupby([0, 0, 0, 1, 1, 1]) with pytest.raises(ValueError, match="Got '50.0' instead"): g.quantile(50) @@ -173,7 +169,7 @@ def test_quantile_out_of_bounds_q_raises(): def test_quantile_missing_group_values_no_segfaults(): # GH 28662 data = np.array([1.0, np.nan, 1.0]) - df = pd.DataFrame(dict(key=data, val=range(3))) + df = DataFrame(dict(key=data, val=range(3))) # Random segfaults; would have been guaranteed in loop grp = df.groupby("key") @@ -195,9 +191,9 @@ def test_quantile_missing_group_values_correct_results( key, val, expected_key, expected_val ): # GH 28662, GH 33200, GH 33569 - df = pd.DataFrame({"key": key, "val": val}) + df = DataFrame({"key": key, "val": val}) - expected = pd.DataFrame( + expected = DataFrame( expected_val, index=pd.Index(expected_key, name="key"), columns=["val"] ) @@ -220,7 +216,7 @@ def test_quantile_missing_group_values_correct_results( @pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]]) def test_groupby_quantile_nullable_array(values, q): # https://github.com/pandas-dev/pandas/issues/33136 - df = pd.DataFrame({"a": ["x"] * 3 + ["y"] * 3, "b": values}) + df = DataFrame({"a": ["x"] * 3 + ["y"] * 3, "b": values}) result = df.groupby("a")["b"].quantile(q) if isinstance(q, list): @@ -236,7 +232,7 @@ def test_groupby_quantile_nullable_array(values, q): @pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]]) def test_groupby_quantile_skips_invalid_dtype(q): - df = pd.DataFrame({"a": [1], "b": [2.0], "c": ["x"]}) + df = DataFrame({"a": [1], "b": [2.0], "c": ["x"]}) result = df.groupby("a").quantile(q) expected = df.groupby("a")[["b"]].quantile(q) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 4693fe360c819..0a1232d3f24da 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -72,7 +72,7 @@ def test_groupby_with_timegrouper_methods(self, should_sort): # GH 3881 # make sure API of timegrouper conforms - df = pd.DataFrame( + df = DataFrame( { "Branch": "A A A A A B".split(), "Buyer": "Carl Mark Carl Joe Joe Carl".split(), @@ -403,7 +403,7 @@ def test_timegrouper_apply_return_type_series(self): # Using `apply` with the `TimeGrouper` should give the # same return type as an `apply` with a `Grouper`. # Issue #11742 - df = pd.DataFrame({"date": ["10/10/2000", "11/10/2000"], "value": [10, 13]}) + df = DataFrame({"date": ["10/10/2000", "11/10/2000"], "value": [10, 13]}) df_dt = df.copy() df_dt["date"] = pd.to_datetime(df_dt["date"]) @@ -420,7 +420,7 @@ def test_timegrouper_apply_return_type_value(self): # Using `apply` with the `TimeGrouper` should give the # same return type as an `apply` with a `Grouper`. # Issue #11742 - df = pd.DataFrame({"date": ["10/10/2000", "11/10/2000"], "value": [10, 13]}) + df = DataFrame({"date": ["10/10/2000", "11/10/2000"], "value": [10, 13]}) df_dt = df.copy() df_dt["date"] = pd.to_datetime(df_dt["date"]) @@ -448,7 +448,7 @@ def test_groupby_groups_datetimeindex(self): # GH#11442 index = pd.date_range("2015/01/01", periods=5, name="date") - df = pd.DataFrame({"A": [5, 6, 7, 8, 9], "B": [1, 2, 3, 4, 5]}, index=index) + df = DataFrame({"A": [5, 6, 7, 8, 9], "B": [1, 2, 3, 4, 5]}, index=index) result = df.groupby(level="date").groups dates = ["2015-01-05", "2015-01-04", "2015-01-03", "2015-01-02", "2015-01-01"] expected = { @@ -461,7 +461,7 @@ def test_groupby_groups_datetimeindex(self): result = grouped.get_group(date) data = [[df.loc[date, "A"], df.loc[date, "B"]]] expected_index = pd.DatetimeIndex([date], name="date", freq="D") - expected = pd.DataFrame(data, columns=list("AB"), index=expected_index) + expected = DataFrame(data, columns=list("AB"), index=expected_index) tm.assert_frame_equal(result, expected) def test_groupby_groups_datetimeindex_tz(self): @@ -671,7 +671,7 @@ def test_groupby_with_timezone_selection(self): # GH 11616 # Test that column selection returns output in correct timezone. np.random.seed(42) - df = pd.DataFrame( + df = DataFrame( { "factor": np.random.randint(0, 3, size=60), "time": pd.date_range( @@ -687,9 +687,9 @@ def test_timezone_info(self): # see gh-11682: Timezone info lost when broadcasting # scalar datetime to DataFrame - df = pd.DataFrame({"a": [1], "b": [datetime.now(pytz.utc)]}) + df = DataFrame({"a": [1], "b": [datetime.now(pytz.utc)]}) assert df["b"][0].tzinfo == pytz.utc - df = pd.DataFrame({"a": [1, 2, 3]}) + df = DataFrame({"a": [1, 2, 3]}) df["b"] = datetime.now(pytz.utc) assert df["b"][0].tzinfo == pytz.utc @@ -733,7 +733,7 @@ def test_first_last_max_min_on_time_data(self): def test_nunique_with_timegrouper_and_nat(self): # GH 17575 - test = pd.DataFrame( + test = DataFrame( { "time": [ Timestamp("2016-06-28 09:35:35"), @@ -760,7 +760,7 @@ def test_scalar_call_versus_list_call(self): ), "value": [1, 2, 3], } - data_frame = pd.DataFrame(data_frame).set_index("time") + data_frame = DataFrame(data_frame).set_index("time") grouper = pd.Grouper(freq="D") grouped = data_frame.groupby(grouper) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 4b79701a57acd..946e60d17e0bb 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -96,7 +96,7 @@ def test_transform_fast(): tm.assert_series_equal(result, expected) # GH 12737 - df = pd.DataFrame( + df = DataFrame( { "grouping": [0, 1, 1, 3], "f": [1.1, 2.1, 3.1, 4.5], @@ -113,7 +113,7 @@ def test_transform_fast(): pd.Timestamp("2014-1-2"), pd.Timestamp("2014-1-4"), ] - expected = pd.DataFrame( + expected = DataFrame( {"f": [1.1, 2.1, 2.1, 4.5], "d": dates, "i": [1, 2, 2, 4]}, columns=["f", "i", "d"], ) @@ -125,7 +125,7 @@ def test_transform_fast(): tm.assert_frame_equal(result, expected) # dup columns - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["g", "a", "a"]) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["g", "a", "a"]) result = df.groupby("g").transform("first") expected = df.drop("g", axis=1) tm.assert_frame_equal(result, expected) @@ -223,11 +223,11 @@ def test_transform_numeric_to_boolean(): # inconsistency in transforming boolean values expected = Series([True, True], name="A") - df = pd.DataFrame({"A": [1.1, 2.2], "B": [1, 2]}) + df = DataFrame({"A": [1.1, 2.2], "B": [1, 2]}) result = df.groupby("B").A.transform(lambda x: True) tm.assert_series_equal(result, expected) - df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}) + df = DataFrame({"A": [1, 2], "B": [1, 2]}) result = df.groupby("B").A.transform(lambda x: True) tm.assert_series_equal(result, expected) @@ -389,7 +389,7 @@ def test_transform_function_aliases(df): def test_series_fast_transform_date(): # GH 13191 - df = pd.DataFrame( + df = DataFrame( {"grouping": [np.nan, 1, 1, 3], "d": pd.date_range("2014-1-1", "2014-1-4")} ) result = df.groupby("grouping")["d"].transform("first") @@ -405,7 +405,7 @@ def test_series_fast_transform_date(): def test_transform_length(): # GH 9697 - df = pd.DataFrame({"col1": [1, 1, 2, 2], "col2": [1, 2, 3, np.nan]}) + df = DataFrame({"col1": [1, 1, 2, 2], "col2": [1, 2, 3, np.nan]}) expected = Series([3.0] * 4) def nsum(x): @@ -426,7 +426,7 @@ def test_transform_coercion(): # 14457 # when we are transforming be sure to not coerce # via assignment - df = pd.DataFrame(dict(A=["a", "a"], B=[0, 1])) + df = DataFrame(dict(A=["a", "a"], B=[0, 1])) g = df.groupby("A") expected = g.transform(np.mean) @@ -482,7 +482,7 @@ def test_groupby_transform_with_int(): def test_groupby_transform_with_nan_group(): # GH 9941 - df = pd.DataFrame({"a": range(10), "b": [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]}) + df = DataFrame({"a": range(10), "b": [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]}) result = df.groupby(df.b)["a"].transform(max) expected = Series([1.0, 1.0, 2.0, 3.0, np.nan, 6.0, 6.0, 9.0, 9.0, 9.0], name="a") tm.assert_series_equal(result, expected) @@ -663,7 +663,7 @@ def test_cython_transform_series(op, args, targop): ], ) def test_groupby_cum_skipna(op, skipna, input, exp): - df = pd.DataFrame(input) + df = DataFrame(input) result = df.groupby("key")["value"].transform(op, skipna=skipna) if isinstance(exp, dict): expected = exp[(op, skipna)] @@ -778,7 +778,7 @@ def test_transform_with_non_scalar_group(): ("non", "G"), ] ) - df = pd.DataFrame( + df = DataFrame( np.random.randint(1, 10, (4, 12)), columns=cols, index=["A", "C", "G", "T"] ) @@ -793,7 +793,7 @@ def test_transform_with_non_scalar_group(): ("a", Series([1, 1, 1], name="a"), tm.assert_series_equal), ( ["a", "c"], - pd.DataFrame({"a": [1, 1, 1], "c": [1, 1, 1]}), + DataFrame({"a": [1, 1, 1], "c": [1, 1, 1]}), tm.assert_frame_equal, ), ], @@ -807,7 +807,7 @@ def test_transform_numeric_ret(cols, exp, comp_func, agg_func, request): request.node.add_marker(pytest.mark.xfail(reason=reason)) # GH 19200 - df = pd.DataFrame( + df = DataFrame( {"a": pd.date_range("2018-01-01", periods=3), "b": range(3), "c": range(7, 10)} ) @@ -890,7 +890,7 @@ def test_pad_stable_sorting(fill_method): if fill_method == "bfill": y = y[::-1] - df = pd.DataFrame({"x": x, "y": y}) + df = DataFrame({"x": x, "y": y}) expected = df.drop("x", 1) result = getattr(df.groupby("x"), fill_method)() @@ -978,7 +978,7 @@ def test_ffill_bfill_non_unique_multilevel(func, expected_status): @pytest.mark.parametrize("func", [np.any, np.all]) def test_any_all_np_func(func): # GH 20653 - df = pd.DataFrame( + df = DataFrame( [["foo", True], [np.nan, True], ["foo", True]], columns=["key", "val"] ) @@ -1000,8 +1000,8 @@ def demean_rename(x): return result - df = pd.DataFrame({"group": list("ababa"), "value": [1, 1, 1, 2, 2]}) - expected = pd.DataFrame({"value": [-1.0 / 3, -0.5, -1.0 / 3, 0.5, 2.0 / 3]}) + df = DataFrame({"group": list("ababa"), "value": [1, 1, 1, 2, 2]}) + expected = DataFrame({"value": [-1.0 / 3, -0.5, -1.0 / 3, 0.5, 2.0 / 3]}) result = df.groupby("group").transform(demean_rename) tm.assert_frame_equal(result, expected) @@ -1013,9 +1013,9 @@ def demean_rename(x): def test_groupby_transform_timezone_column(func): # GH 24198 ts = pd.to_datetime("now", utc=True).tz_convert("Asia/Singapore") - result = pd.DataFrame({"end_time": [ts], "id": [1]}) + result = DataFrame({"end_time": [ts], "id": [1]}) result["max_end_time"] = result.groupby("id").end_time.transform(func) - expected = pd.DataFrame([[ts, 1, ts]], columns=["end_time", "id", "max_end_time"]) + expected = DataFrame([[ts, 1, ts]], columns=["end_time", "id", "max_end_time"]) tm.assert_frame_equal(result, expected) @@ -1030,7 +1030,7 @@ def test_groupby_transform_with_datetimes(func, values): # GH 15306 dates = pd.date_range("1/1/2011", periods=10, freq="D") - stocks = pd.DataFrame({"price": np.arange(10.0)}, index=dates) + stocks = DataFrame({"price": np.arange(10.0)}, index=dates) stocks["week_id"] = dates.isocalendar().week result = stocks.groupby(stocks["week_id"])["price"].transform(func) @@ -1057,7 +1057,7 @@ def test_transform_absent_categories(func): @pytest.mark.parametrize("key, val", [("level", 0), ("by", Series([0]))]) def test_ffill_not_in_axis(func, key, val): # GH 21521 - df = pd.DataFrame([[np.nan]]) + df = DataFrame([[np.nan]]) result = getattr(df.groupby(**{key: val}), func)() expected = df @@ -1143,7 +1143,7 @@ def test_transform_fastpath_raises(): # GH#29631 case where fastpath defined in groupby.generic _choose_path # raises, but slow_path does not - df = pd.DataFrame({"A": [1, 1, 2, 2], "B": [1, -1, 1, 2]}) + df = DataFrame({"A": [1, 1, 2, 2], "B": [1, -1, 1, 2]}) gb = df.groupby("A") def func(grp): @@ -1165,13 +1165,13 @@ def func(grp): result = gb.transform(func) - expected = pd.DataFrame([2, -2, 2, 4], columns=["B"]) + expected = DataFrame([2, -2, 2, 4], columns=["B"]) tm.assert_frame_equal(result, expected) def test_transform_lambda_indexing(): # GH 7883 - df = pd.DataFrame( + df = DataFrame( { "A": ["foo", "bar", "foo", "bar", "foo", "flux", "foo", "flux"], "B": ["one", "one", "two", "three", "two", "six", "five", "three"], @@ -1211,14 +1211,14 @@ def test_categorical_and_not_categorical_key(observed): # and a non-categorical key, doesn't try to expand the output to include # non-observed categories but instead matches the input shape. # GH 32494 - df_with_categorical = pd.DataFrame( + df_with_categorical = DataFrame( { "A": pd.Categorical(["a", "b", "a"], categories=["a", "b", "c"]), "B": [1, 2, 3], "C": ["a", "b", "a"], } ) - df_without_categorical = pd.DataFrame( + df_without_categorical = DataFrame( {"A": ["a", "b", "a"], "B": [1, 2, 3], "C": ["a", "b", "a"]} ) @@ -1226,7 +1226,7 @@ def test_categorical_and_not_categorical_key(observed): result = df_with_categorical.groupby(["A", "C"], observed=observed).transform("sum") expected = df_without_categorical.groupby(["A", "C"]).transform("sum") tm.assert_frame_equal(result, expected) - expected_explicit = pd.DataFrame({"B": [4, 2, 4]}) + expected_explicit = DataFrame({"B": [4, 2, 4]}) tm.assert_frame_equal(result, expected_explicit) # Series case diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 3519c5d0d5a9a..c80548783d148 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -98,7 +98,7 @@ def test_to_frame_dtype_fidelity(): ) original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)} - expected_df = pd.DataFrame( + expected_df = DataFrame( { "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"), "a": [1, 1, 1, 2, 2, 2], diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index a1e5cc33ef2f6..3d7e6e9c32248 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -98,7 +98,7 @@ def test_unsortedindex(): [("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")], names=["one", "two"], ) - df = pd.DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"]) + df = DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"]) # GH 16734: not sorted, but no real slicing result = df.loc(axis=0)["z", "a"] diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index 1f3f59d038ce9..df59d09edd3ef 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -131,9 +131,9 @@ def test_mi_intervalindex_slicing_with_scalar(self): ) idx.names = ["Item", "RID", "MP"] - df = pd.DataFrame({"value": [1, 2, 3, 4, 5, 6, 7, 8]}) + df = DataFrame({"value": [1, 2, 3, 4, 5, 6, 7, 8]}) df.index = idx - query_df = pd.DataFrame( + query_df = DataFrame( { "Item": ["FC", "OWNER", "FC", "OWNER", "OWNER"], "RID": ["RID1", "RID1", "RID1", "RID2", "RID2"], diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 6072400d06a36..03046f51d668a 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -448,7 +448,7 @@ def test_loc_period_string_indexing(): a = pd.period_range("2013Q1", "2013Q4", freq="Q") i = (1111, 2222, 3333) idx = pd.MultiIndex.from_product((a, i), names=("Periode", "CVR")) - df = pd.DataFrame( + df = DataFrame( index=idx, columns=( "OMS", @@ -478,7 +478,7 @@ def test_loc_datetime_mask_slicing(): # GH 16699 dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"]) m_idx = pd.MultiIndex.from_product([dt_idx, dt_idx], names=["Idx1", "Idx2"]) - df = pd.DataFrame( + df = DataFrame( data=[[1, 2], [3, 4], [5, 6], [7, 6]], index=m_idx, columns=["C1", "C2"] ) result = df.loc[(dt_idx[0], (df.index.get_level_values(1) > "2017-05-04")), "C1"] @@ -554,7 +554,7 @@ def test_3levels_leading_period_index(): class TestKeyErrorsWithMultiIndex: def test_missing_keys_raises_keyerror(self): # GH#27420 KeyError, not TypeError - df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=["A", "B", "C"]) + df = DataFrame(np.arange(12).reshape(4, 3), columns=["A", "B", "C"]) df2 = df.set_index(["A", "B"]) with pytest.raises(KeyError, match="1"): diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 4565d79c632de..2e97dec789c5b 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -73,9 +73,9 @@ def test_nested_tuples_duplicates(self): idx = pd.Index(["a", "a", "c"]) mi = pd.MultiIndex.from_arrays([dti, idx], names=["index1", "index2"]) - df = pd.DataFrame({"c1": [1, 2, 3], "c2": [np.nan, np.nan, np.nan]}, index=mi) + df = DataFrame({"c1": [1, 2, 3], "c2": [np.nan, np.nan, np.nan]}, index=mi) - expected = pd.DataFrame({"c1": df["c1"], "c2": [1.0, 1.0, np.nan]}, index=mi) + expected = DataFrame({"c1": df["c1"], "c2": [1.0, 1.0, np.nan]}, index=mi) df2 = df.copy(deep=True) df2.loc[(dti[0], "a"), "c2"] = 1.0 diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 853b92ea91274..b58b81d5aa1b3 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -429,9 +429,9 @@ def test_setitem_nonmonotonic(self): index = pd.MultiIndex.from_tuples( [("a", "c"), ("b", "x"), ("a", "d")], names=["l1", "l2"] ) - df = pd.DataFrame(data=[0, 1, 2], index=index, columns=["e"]) + df = DataFrame(data=[0, 1, 2], index=index, columns=["e"]) df.loc["a", "e"] = np.arange(99, 101, dtype="int64") - expected = pd.DataFrame({"e": [99, 1, 100]}, index=index) + expected = DataFrame({"e": [99, 1, 100]}, index=index) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py index c1b41c6f5d8cf..024cc3ad72688 100644 --- a/pandas/tests/indexing/multiindex/test_slice.py +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -496,7 +496,7 @@ def test_loc_axis_arguments(self): def test_loc_axis_single_level_multi_col_indexing_multiindex_col_df(self): # GH29519 - df = pd.DataFrame( + df = DataFrame( np.arange(27).reshape(3, 9), columns=pd.MultiIndex.from_product( [["a1", "a2", "a3"], ["b1", "b2", "b3"]] @@ -510,7 +510,7 @@ def test_loc_axis_single_level_multi_col_indexing_multiindex_col_df(self): def test_loc_axis_single_level_single_col_indexing_multiindex_col_df(self): # GH29519 - df = pd.DataFrame( + df = DataFrame( np.arange(27).reshape(3, 9), columns=pd.MultiIndex.from_product( [["a1", "a2", "a3"], ["b1", "b2", "b3"]] @@ -526,7 +526,7 @@ def test_loc_ax_single_level_indexer_simple_df(self): # GH29519 # test single level indexing on single index column data frame - df = pd.DataFrame(np.arange(9).reshape(3, 3), columns=["a", "b", "c"]) + df = DataFrame(np.arange(9).reshape(3, 3), columns=["a", "b", "c"]) result = df.loc(axis=1)["a"] expected = Series(np.array([0, 3, 6]), name="a") tm.assert_series_equal(result, expected) @@ -736,11 +736,11 @@ def test_non_reducing_slice_on_multiindex(self): ("b", "c"): [3, 2], ("b", "d"): [4, 1], } - df = pd.DataFrame(dic, index=[0, 1]) + df = DataFrame(dic, index=[0, 1]) idx = pd.IndexSlice slice_ = idx[:, idx["b", "d"]] tslice_ = non_reducing_slice(slice_) result = df.loc[tslice_] - expected = pd.DataFrame({("b", "d"): [4, 1]}) + expected = DataFrame({("b", "d"): [4, 1]}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 1241d394d7936..d162468235767 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -83,7 +83,7 @@ def test_setitem_cache_updating(self): def test_altering_series_clears_parent_cache(self): # GH #33675 - df = pd.DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"]) + df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"]) ser = df["A"] assert "A" in df._item_cache @@ -350,14 +350,12 @@ def test_detect_chained_assignment_warnings_errors(self): def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self): # xref gh-13017. with option_context("chained_assignment", "warn"): - df = pd.DataFrame( - [[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"] - ) + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"]) with tm.assert_produces_warning(com.SettingWithCopyWarning): df.c.loc[df.c > 0] = None - expected = pd.DataFrame( + expected = DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"] ) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index f5e6aea5f8db8..5e00056c33db7 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -334,9 +334,9 @@ def test_loc_setitem_with_existing_dst(self): end = pd.Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid") ts = pd.Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid") idx = pd.date_range(start, end, closed="left", freq="H") - result = pd.DataFrame(index=idx, columns=["value"]) + result = DataFrame(index=idx, columns=["value"]) result.loc[ts, "value"] = 12 - expected = pd.DataFrame( + expected = DataFrame( [np.nan] * len(idx) + [12], index=idx.append(pd.DatetimeIndex([ts])), columns=["value"], diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index f94f1d6aa453f..31abe45215432 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -195,7 +195,7 @@ def test_iloc_array_not_mutating_negative_indices(self): # GH 21867 array_with_neg_numbers = np.array([1, 2, -1]) array_copy = array_with_neg_numbers.copy() - df = pd.DataFrame( + df = DataFrame( {"A": [100, 101, 102], "B": [103, 104, 105], "C": [106, 107, 108]}, index=[1, 2, 3], ) @@ -372,7 +372,7 @@ def test_iloc_setitem_dups(self): def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(self): # Same as the "assign back to self" check in test_iloc_setitem_dups # but on a DataFrame with multiple blocks - df = pd.DataFrame([[0, 1], [2, 3]], columns=["B", "B"]) + df = DataFrame([[0, 1], [2, 3]], columns=["B", "B"]) df.iloc[:, 0] = df.iloc[:, 0].astype("f8") assert len(df._mgr.blocks) == 2 @@ -562,7 +562,7 @@ def test_iloc_setitem_with_scalar_index(self, indexer, value): # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated # elementwisely, not using "setter('A', ['Z'])". - df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) df.iloc[0, indexer] = value result = df.iloc[0, 0] @@ -712,7 +712,7 @@ def test_series_indexing_zerodim_np_array(self): def test_iloc_setitem_categorical_updates_inplace(self): # Mixed dtype ensures we go through take_split_path in setitem_with_indexer cat = pd.Categorical(["A", "B", "C"]) - df = pd.DataFrame({1: cat, 2: [1, 2, 3]}) + df = DataFrame({1: cat, 2: [1, 2, 3]}) # This should modify our original values in-place df.iloc[:, 0] = cat[::-1] @@ -743,8 +743,8 @@ def test_iloc_with_boolean_operation(self): class TestILocSetItemDuplicateColumns: def test_iloc_setitem_scalar_duplicate_columns(self): # GH#15686, duplicate columns and mixed dtype - df1 = pd.DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) - df2 = pd.DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) + df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) + df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) df = pd.concat([df1, df2], axis=1) df.iloc[0, 0] = -1 @@ -754,15 +754,15 @@ def test_iloc_setitem_scalar_duplicate_columns(self): def test_iloc_setitem_list_duplicate_columns(self): # GH#22036 setting with same-sized list - df = pd.DataFrame([[0, "str", "str2"]], columns=["a", "b", "b"]) + df = DataFrame([[0, "str", "str2"]], columns=["a", "b", "b"]) df.iloc[:, 2] = ["str3"] - expected = pd.DataFrame([[0, "str", "str3"]], columns=["a", "b", "b"]) + expected = DataFrame([[0, "str", "str3"]], columns=["a", "b", "b"]) tm.assert_frame_equal(df, expected) def test_iloc_setitem_series_duplicate_columns(self): - df = pd.DataFrame( + df = DataFrame( np.arange(8, dtype=np.int64).reshape(2, 4), columns=["A", "B", "A", "B"] ) df.iloc[:, 0] = df.iloc[:, 0].astype(np.float64) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index b4ea92fae1136..79834dc36ce7d 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -991,7 +991,7 @@ def test_none_coercion_mixed_dtypes(self): def test_extension_array_cross_section(): # A cross-section of a homogeneous EA should be an EA - df = pd.DataFrame( + df = DataFrame( { "A": pd.core.arrays.integer_array([1, 2]), "B": pd.core.arrays.integer_array([3, 4]), @@ -1008,7 +1008,7 @@ def test_extension_array_cross_section(): def test_extension_array_cross_section_converts(): # all numeric columns -> numeric series - df = pd.DataFrame( + df = DataFrame( {"A": pd.array([1, 2], dtype="Int64"), "B": np.array([1, 2])}, index=["a", "b"] ) result = df.loc["a"] @@ -1019,7 +1019,7 @@ def test_extension_array_cross_section_converts(): tm.assert_series_equal(result, expected) # mixed columns -> object series - df = pd.DataFrame( + df = DataFrame( {"A": pd.array([1, 2], dtype="Int64"), "B": np.array(["a", "b"])}, index=["a", "b"], ) @@ -1033,7 +1033,7 @@ def test_extension_array_cross_section_converts(): def test_readonly_indices(): # GH#17192 iloc with read-only array raising TypeError - df = pd.DataFrame({"data": np.ones(100, dtype="float64")}) + df = DataFrame({"data": np.ones(100, dtype="float64")}) indices = np.array([1, 3, 6]) indices.flags.writeable = False @@ -1109,9 +1109,9 @@ def test_long_text_missing_labels_inside_loc_error_message_limited(): def test_setitem_categorical(): # https://github.com/pandas-dev/pandas/issues/35369 - df = pd.DataFrame({"h": Series(list("mn")).astype("category")}) + df = DataFrame({"h": Series(list("mn")).astype("category")}) df.h = df.h.cat.reorder_categories(["n", "m"]) - expected = pd.DataFrame( + expected = DataFrame( {"h": pd.Categorical(["m", "n"]).reorder_categories(["n", "m"])} ) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index e7f2ad6e8d735..5c5692b777360 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -632,7 +632,7 @@ def test_loc_setitem_with_scalar_index(self, indexer, value): # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated # elementwisely, not using "setter('A', ['Z'])". - df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) df.loc[0, indexer] = value result = df.loc[0, "A"] @@ -644,7 +644,7 @@ def test_loc_setitem_with_scalar_index(self, indexer, value): ( ([0, 2], ["A", "B", "C", "D"]), 7, - pd.DataFrame( + DataFrame( [[7, 7, 7, 7], [3, 4, np.nan, np.nan], [7, 7, 7, 7]], columns=["A", "B", "C", "D"], ), @@ -652,7 +652,7 @@ def test_loc_setitem_with_scalar_index(self, indexer, value): ( (1, ["C", "D"]), [7, 8], - pd.DataFrame( + DataFrame( [[1, 2, np.nan, np.nan], [3, 4, 7, 8], [5, 6, np.nan, np.nan]], columns=["A", "B", "C", "D"], ), @@ -660,14 +660,14 @@ def test_loc_setitem_with_scalar_index(self, indexer, value): ( (1, ["A", "B", "C"]), np.array([7, 8, 9], dtype=np.int64), - pd.DataFrame( + DataFrame( [[1, 2, np.nan], [7, 8, 9], [5, 6, np.nan]], columns=["A", "B", "C"] ), ), ( (slice(1, 3, None), ["B", "C", "D"]), [[7, 8, 9], [10, 11, 12]], - pd.DataFrame( + DataFrame( [[1, 2, np.nan, np.nan], [3, 7, 8, 9], [5, 10, 11, 12]], columns=["A", "B", "C", "D"], ), @@ -675,15 +675,15 @@ def test_loc_setitem_with_scalar_index(self, indexer, value): ( (slice(1, 3, None), ["C", "A", "D"]), np.array([[7, 8, 9], [10, 11, 12]], dtype=np.int64), - pd.DataFrame( + DataFrame( [[1, 2, np.nan, np.nan], [8, 4, 7, 9], [11, 6, 10, 12]], columns=["A", "B", "C", "D"], ), ), ( (slice(None, None, None), ["A", "C"]), - pd.DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), - pd.DataFrame( + DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), + DataFrame( [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"] ), ), @@ -691,7 +691,7 @@ def test_loc_setitem_with_scalar_index(self, indexer, value): ) def test_loc_setitem_missing_columns(self, index, box, expected): # GH 29334 - df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) + df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) df.loc[index] = box tm.assert_frame_equal(df, expected) @@ -1010,13 +1010,13 @@ def test_loc_getitem_label_list_integer_labels( def test_loc_setitem_float_intindex(): # GH 8720 rand_data = np.random.randn(8, 4) - result = pd.DataFrame(rand_data) + result = DataFrame(rand_data) result.loc[:, 0.5] = np.nan expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1))) - expected = pd.DataFrame(expected_data, columns=[0.0, 1.0, 2.0, 3.0, 0.5]) + expected = DataFrame(expected_data, columns=[0.0, 1.0, 2.0, 3.0, 0.5]) tm.assert_frame_equal(result, expected) - result = pd.DataFrame(rand_data) + result = DataFrame(rand_data) result.loc[:, 0.5] = np.nan tm.assert_frame_equal(result, expected) @@ -1024,13 +1024,13 @@ def test_loc_setitem_float_intindex(): def test_loc_axis_1_slice(): # GH 10586 cols = [(yr, m) for yr in [2014, 2015] for m in [7, 8, 9, 10]] - df = pd.DataFrame( + df = DataFrame( np.ones((10, 8)), index=tuple("ABCDEFGHIJ"), columns=pd.MultiIndex.from_tuples(cols), ) result = df.loc(axis=1)[(2014, 9):(2015, 8)] - expected = pd.DataFrame( + expected = DataFrame( np.ones((10, 4)), index=tuple("ABCDEFGHIJ"), columns=pd.MultiIndex.from_tuples( @@ -1042,7 +1042,7 @@ def test_loc_axis_1_slice(): def test_loc_set_dataframe_multiindex(): # GH 14592 - expected = pd.DataFrame( + expected = DataFrame( "a", index=range(2), columns=pd.MultiIndex.from_product([range(2), range(2)]) ) result = expected.copy() @@ -1072,7 +1072,7 @@ def test_loc_with_positional_slice_deprecation(): def test_loc_slice_disallows_positional(): # GH#16121, GH#24612, GH#31810 dti = pd.date_range("2016-01-01", periods=3) - df = pd.DataFrame(np.random.random((3, 2)), index=dti) + df = DataFrame(np.random.random((3, 2)), index=dti) ser = df[0] @@ -1100,7 +1100,7 @@ def test_loc_slice_disallows_positional(): def test_loc_datetimelike_mismatched_dtypes(): # GH#32650 dont mix and match datetime/timedelta/period dtypes - df = pd.DataFrame( + df = DataFrame( np.random.randn(5, 3), columns=["a", "b", "c"], index=pd.date_range("2012", freq="H", periods=5), @@ -1122,7 +1122,7 @@ def test_loc_datetimelike_mismatched_dtypes(): def test_loc_with_period_index_indexer(): # GH#4125 idx = pd.period_range("2002-01", "2003-12", freq="M") - df = pd.DataFrame(np.random.randn(24, 10), index=idx) + df = DataFrame(np.random.randn(24, 10), index=idx) tm.assert_frame_equal(df, df.loc[idx]) tm.assert_frame_equal(df, df.loc[list(idx)]) tm.assert_frame_equal(df, df.loc[list(idx)]) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 6005f7800178c..45c2725c26526 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -663,21 +663,21 @@ def test_indexing_timeseries_regression(self): def test_index_name_empty(self): # GH 31368 - df = pd.DataFrame({}, index=pd.RangeIndex(0, name="df_index")) + df = DataFrame({}, index=pd.RangeIndex(0, name="df_index")) series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) df["series"] = series - expected = pd.DataFrame( + expected = DataFrame( {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="df_index") ) tm.assert_frame_equal(df, expected) # GH 36527 - df = pd.DataFrame() + df = DataFrame() series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) df["series"] = series - expected = pd.DataFrame( + expected = DataFrame( {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="series_index") ) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 9a0bfa5c605d9..90f3a392878d9 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1140,7 +1140,7 @@ def test_binop_other(self, op, value, dtype): pytest.skip(f"Invalid combination {op},{dtype}") e = DummyElement(value, dtype) - s = pd.DataFrame({"A": [e.value, e.value]}, dtype=e.dtype) + s = DataFrame({"A": [e.value, e.value]}, dtype=e.dtype) invalid = { (operator.pow, " 43 chars.", "dog"), ] ) - df = pd.DataFrame(1, index=index, columns=columns) + df = DataFrame(1, index=index, columns=columns) result = repr(df) @@ -381,7 +381,7 @@ def test_repr_truncates_terminal_size(self, monkeypatch): assert "dog" in h2 # regular columns - df2 = pd.DataFrame({"A" * 41: [1, 2], "B" * 41: [1, 2]}) + df2 = DataFrame({"A" * 41: [1, 2], "B" * 41: [1, 2]}) result = repr(df2) assert df2.columns[0] in result.split("\n")[0] @@ -389,7 +389,7 @@ def test_repr_truncates_terminal_size(self, monkeypatch): def test_repr_truncates_terminal_size_full(self, monkeypatch): # GH 22984 ensure entire window is filled terminal_size = (80, 24) - df = pd.DataFrame(np.random.rand(1, 7)) + df = DataFrame(np.random.rand(1, 7)) monkeypatch.setattr( "pandas.io.formats.format.get_terminal_size", lambda: terminal_size @@ -399,7 +399,7 @@ def test_repr_truncates_terminal_size_full(self, monkeypatch): def test_repr_truncation_column_size(self): # dataframe with last column very wide -> check it is not used to # determine size of truncation (...) column - df = pd.DataFrame( + df = DataFrame( { "a": [108480, 30830], "b": [12345, 12345], @@ -457,13 +457,13 @@ def mkframe(n): assert has_expanded_repr(df) def test_repr_min_rows(self): - df = pd.DataFrame({"a": range(20)}) + df = DataFrame({"a": range(20)}) # default setting no truncation even if above min_rows assert ".." not in repr(df) assert ".." not in df._repr_html_() - df = pd.DataFrame({"a": range(61)}) + df = DataFrame({"a": range(61)}) # default of max_rows 60 triggers truncation if above assert ".." in repr(df) @@ -493,7 +493,7 @@ def test_repr_min_rows(self): def test_str_max_colwidth(self): # GH 7856 - df = pd.DataFrame( + df = DataFrame( [ { "a": "foo", @@ -689,7 +689,7 @@ def test_east_asian_unicode_false(self): # truncate with option_context("display.max_rows", 3, "display.max_columns", 3): - df = pd.DataFrame( + df = DataFrame( { "a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"], @@ -834,7 +834,7 @@ def test_east_asian_unicode_true(self): # truncate with option_context("display.max_rows", 3, "display.max_columns", 3): - df = pd.DataFrame( + df = DataFrame( { "a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"], @@ -1020,7 +1020,7 @@ def test_datetimelike_frame(self): assert "[6 rows x 1 columns]" in result dts = [pd.Timestamp("2011-01-01", tz="US/Eastern")] * 5 + [pd.NaT] * 5 - df = pd.DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) + df = DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) with option_context("display.max_rows", 5): expected = ( " dt x\n" @@ -1034,7 +1034,7 @@ def test_datetimelike_frame(self): assert repr(df) == expected dts = [pd.NaT] * 5 + [pd.Timestamp("2011-01-01", tz="US/Eastern")] * 5 - df = pd.DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) + df = DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) with option_context("display.max_rows", 5): expected = ( " dt x\n" @@ -1050,7 +1050,7 @@ def test_datetimelike_frame(self): dts = [pd.Timestamp("2011-01-01", tz="Asia/Tokyo")] * 5 + [ pd.Timestamp("2011-01-01", tz="US/Eastern") ] * 5 - df = pd.DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) + df = DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) with option_context("display.max_rows", 5): expected = ( " dt x\n" @@ -2001,14 +2001,14 @@ def test_categorical_columns(self): # GH35439 data = [[4, 2], [3, 2], [4, 3]] cols = ["aaaaaaaaa", "b"] - df = pd.DataFrame(data, columns=cols) - df_cat_cols = pd.DataFrame(data, columns=pd.CategoricalIndex(cols)) + df = DataFrame(data, columns=cols) + df_cat_cols = DataFrame(data, columns=pd.CategoricalIndex(cols)) assert df.to_string() == df_cat_cols.to_string() def test_period(self): # GH 12615 - df = pd.DataFrame( + df = DataFrame( { "A": pd.period_range("2013-01", periods=4, freq="M"), "B": [ @@ -2694,9 +2694,7 @@ def test_to_string_header(self): def test_to_string_multindex_header(self): # GH 16718 - df = pd.DataFrame({"a": [0], "b": [1], "c": [2], "d": [3]}).set_index( - ["a", "b"] - ) + df = DataFrame({"a": [0], "b": [1], "c": [2], "d": [3]}).set_index(["a", "b"]) res = df.to_string(header=["r1", "r2"]) exp = " r1 r2\na b \n0 1 2 3" assert res == exp @@ -2797,7 +2795,7 @@ def test_output_significant_digits(self): # In case default display precision changes: with pd.option_context("display.precision", 6): # DataFrame example from issue #9764 - d = pd.DataFrame( + d = DataFrame( { "col1": [ 9.999e-8, @@ -2869,11 +2867,11 @@ def test_too_long(self): with pd.option_context("display.precision", 4): # need both a number > 1e6 and something that normally formats to # having length > display.precision + 6 - df = pd.DataFrame(dict(x=[12345.6789])) + df = DataFrame(dict(x=[12345.6789])) assert str(df) == " x\n0 12345.6789" - df = pd.DataFrame(dict(x=[2e6])) + df = DataFrame(dict(x=[2e6])) assert str(df) == " x\n0 2000000.0" - df = pd.DataFrame(dict(x=[12345.6789, 2e6])) + df = DataFrame(dict(x=[12345.6789, 2e6])) assert str(df) == " x\n0 1.2346e+04\n1 2.0000e+06" @@ -3205,8 +3203,8 @@ def test_format_percentiles_integer_idx(): def test_repr_html_ipython_config(ip): code = textwrap.dedent( """\ - import pandas as pd - df = pd.DataFrame({"A": [1, 2]}) + from pandas import DataFrame + df = DataFrame({"A": [1, 2]}) df._repr_html_() cfg = get_ipython().config diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 476d75f7d239d..79f9bbace000e 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -28,10 +28,10 @@ def h(x, foo="bar"): self.h = h self.styler = Styler(self.df) - self.attrs = pd.DataFrame({"A": ["color: red", "color: blue"]}) + self.attrs = DataFrame({"A": ["color: red", "color: blue"]}) self.dataframes = [ self.df, - pd.DataFrame( + DataFrame( {"f": [1.0, 2.0], "o": ["a", "b"], "c": pd.Categorical(["a", "b"])} ), ] @@ -110,7 +110,7 @@ def test_clear(self): assert len(s._todo) == 0 def test_render(self): - df = pd.DataFrame({"A": [0, 1]}) + df = DataFrame({"A": [0, 1]}) style = lambda x: pd.Series(["color: red", "color: blue"], name=x.name) s = Styler(df, uuid="AB").apply(style) s.render() @@ -127,7 +127,7 @@ def test_render_empty_dfs(self): # No IndexError raised? def test_render_double(self): - df = pd.DataFrame({"A": [0, 1]}) + df = DataFrame({"A": [0, 1]}) style = lambda x: pd.Series( ["color: red; border: 1px", "color: blue; border: 2px"], name=x.name ) @@ -136,7 +136,7 @@ def test_render_double(self): # it worked? def test_set_properties(self): - df = pd.DataFrame({"A": [0, 1]}) + df = DataFrame({"A": [0, 1]}) result = df.style.set_properties(color="white", size="10px")._compute().ctx # order is deterministic v = ["color: white", "size: 10px"] @@ -146,7 +146,7 @@ def test_set_properties(self): assert sorted(v1) == sorted(v2) def test_set_properties_subset(self): - df = pd.DataFrame({"A": [0, 1]}) + df = DataFrame({"A": [0, 1]}) result = ( df.style.set_properties(subset=pd.IndexSlice[0, "A"], color="white") ._compute() @@ -157,7 +157,7 @@ def test_set_properties_subset(self): def test_empty_index_name_doesnt_display(self): # https://github.com/pandas-dev/pandas/pull/12090#issuecomment-180695902 - df = pd.DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) result = df.style._translate() expected = [ @@ -197,7 +197,7 @@ def test_empty_index_name_doesnt_display(self): def test_index_name(self): # https://github.com/pandas-dev/pandas/issues/11655 - df = pd.DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) result = df.set_index("A").style._translate() expected = [ @@ -235,7 +235,7 @@ def test_index_name(self): def test_multiindex_name(self): # https://github.com/pandas-dev/pandas/issues/11655 - df = pd.DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) result = df.set_index(["A", "B"]).style._translate() expected = [ @@ -274,11 +274,11 @@ def test_multiindex_name(self): def test_numeric_columns(self): # https://github.com/pandas-dev/pandas/issues/12125 # smoke test for _translate - df = pd.DataFrame({0: [1, 2, 3]}) + df = DataFrame({0: [1, 2, 3]}) df.style._translate() def test_apply_axis(self): - df = pd.DataFrame({"A": [0, 0], "B": [1, 1]}) + df = DataFrame({"A": [0, 0], "B": [1, 1]}) f = lambda x: [f"val: {x.max()}" for v in x] result = df.style.apply(f, axis=1) assert len(result._todo) == 1 @@ -373,7 +373,7 @@ def color_negative_red(val): } idx = pd.IndexSlice - df = pd.DataFrame(dic, index=[0, 1]) + df = DataFrame(dic, index=[0, 1]) (df.style.applymap(color_negative_red, subset=idx[:, idx["b", "d"]]).render()) @@ -468,7 +468,7 @@ def g(x): assert result == expected def test_empty(self): - df = pd.DataFrame({"A": [1, 0]}) + df = DataFrame({"A": [1, 0]}) s = df.style s.ctx = {(0, 0): ["color: red"], (1, 0): [""]} @@ -480,7 +480,7 @@ def test_empty(self): assert result == expected def test_duplicate(self): - df = pd.DataFrame({"A": [1, 0]}) + df = DataFrame({"A": [1, 0]}) s = df.style s.ctx = {(0, 0): ["color: red"], (1, 0): ["color: red"]} @@ -491,7 +491,7 @@ def test_duplicate(self): assert result == expected def test_bar_align_left(self): - df = pd.DataFrame({"A": [0, 1, 2]}) + df = DataFrame({"A": [0, 1, 2]}) result = df.style.bar()._compute().ctx expected = { (0, 0): ["width: 10em", " height: 80%"], @@ -534,7 +534,7 @@ def test_bar_align_left(self): assert result == expected def test_bar_align_left_0points(self): - df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) result = df.style.bar()._compute().ctx expected = { (0, 0): ["width: 10em", " height: 80%"], @@ -620,7 +620,7 @@ def test_bar_align_left_0points(self): assert result == expected def test_bar_align_mid_pos_and_neg(self): - df = pd.DataFrame({"A": [-10, 0, 20, 90]}) + df = DataFrame({"A": [-10, 0, 20, 90]}) result = df.style.bar(align="mid", color=["#d65f5f", "#5fba7d"])._compute().ctx @@ -652,7 +652,7 @@ def test_bar_align_mid_pos_and_neg(self): assert result == expected def test_bar_align_mid_all_pos(self): - df = pd.DataFrame({"A": [10, 20, 50, 100]}) + df = DataFrame({"A": [10, 20, 50, 100]}) result = df.style.bar(align="mid", color=["#d65f5f", "#5fba7d"])._compute().ctx @@ -686,7 +686,7 @@ def test_bar_align_mid_all_pos(self): assert result == expected def test_bar_align_mid_all_neg(self): - df = pd.DataFrame({"A": [-100, -60, -30, -20]}) + df = DataFrame({"A": [-100, -60, -30, -20]}) result = df.style.bar(align="mid", color=["#d65f5f", "#5fba7d"])._compute().ctx @@ -726,7 +726,7 @@ def test_bar_align_mid_all_neg(self): def test_bar_align_zero_pos_and_neg(self): # See https://github.com/pandas-dev/pandas/pull/14757 - df = pd.DataFrame({"A": [-10, 0, 20, 90]}) + df = DataFrame({"A": [-10, 0, 20, 90]}) result = ( df.style.bar(align="zero", color=["#d65f5f", "#5fba7d"], width=90) @@ -760,7 +760,7 @@ def test_bar_align_zero_pos_and_neg(self): assert result == expected def test_bar_align_left_axis_none(self): - df = pd.DataFrame({"A": [0, 1], "B": [2, 4]}) + df = DataFrame({"A": [0, 1], "B": [2, 4]}) result = df.style.bar(axis=None)._compute().ctx expected = { (0, 0): ["width: 10em", " height: 80%"], @@ -786,7 +786,7 @@ def test_bar_align_left_axis_none(self): assert result == expected def test_bar_align_zero_axis_none(self): - df = pd.DataFrame({"A": [0, 1], "B": [-2, 4]}) + df = DataFrame({"A": [0, 1], "B": [-2, 4]}) result = df.style.bar(align="zero", axis=None)._compute().ctx expected = { (0, 0): ["width: 10em", " height: 80%"], @@ -815,7 +815,7 @@ def test_bar_align_zero_axis_none(self): assert result == expected def test_bar_align_mid_axis_none(self): - df = pd.DataFrame({"A": [0, 1], "B": [-2, 4]}) + df = DataFrame({"A": [0, 1], "B": [-2, 4]}) result = df.style.bar(align="mid", axis=None)._compute().ctx expected = { (0, 0): ["width: 10em", " height: 80%"], @@ -843,7 +843,7 @@ def test_bar_align_mid_axis_none(self): assert result == expected def test_bar_align_mid_vmin(self): - df = pd.DataFrame({"A": [0, 1], "B": [-2, 4]}) + df = DataFrame({"A": [0, 1], "B": [-2, 4]}) result = df.style.bar(align="mid", axis=None, vmin=-6)._compute().ctx expected = { (0, 0): ["width: 10em", " height: 80%"], @@ -872,7 +872,7 @@ def test_bar_align_mid_vmin(self): assert result == expected def test_bar_align_mid_vmax(self): - df = pd.DataFrame({"A": [0, 1], "B": [-2, 4]}) + df = DataFrame({"A": [0, 1], "B": [-2, 4]}) result = df.style.bar(align="mid", axis=None, vmax=8)._compute().ctx expected = { (0, 0): ["width: 10em", " height: 80%"], @@ -900,7 +900,7 @@ def test_bar_align_mid_vmax(self): assert result == expected def test_bar_align_mid_vmin_vmax_wide(self): - df = pd.DataFrame({"A": [0, 1], "B": [-2, 4]}) + df = DataFrame({"A": [0, 1], "B": [-2, 4]}) result = df.style.bar(align="mid", axis=None, vmin=-3, vmax=7)._compute().ctx expected = { (0, 0): ["width: 10em", " height: 80%"], @@ -929,7 +929,7 @@ def test_bar_align_mid_vmin_vmax_wide(self): assert result == expected def test_bar_align_mid_vmin_vmax_clipping(self): - df = pd.DataFrame({"A": [0, 1], "B": [-2, 4]}) + df = DataFrame({"A": [0, 1], "B": [-2, 4]}) result = df.style.bar(align="mid", axis=None, vmin=-1, vmax=3)._compute().ctx expected = { (0, 0): ["width: 10em", " height: 80%"], @@ -957,7 +957,7 @@ def test_bar_align_mid_vmin_vmax_clipping(self): assert result == expected def test_bar_align_mid_nans(self): - df = pd.DataFrame({"A": [1, None], "B": [-1, 3]}) + df = DataFrame({"A": [1, None], "B": [-1, 3]}) result = df.style.bar(align="mid", axis=None)._compute().ctx expected = { (0, 0): [ @@ -984,7 +984,7 @@ def test_bar_align_mid_nans(self): assert result == expected def test_bar_align_zero_nans(self): - df = pd.DataFrame({"A": [1, None], "B": [-1, 2]}) + df = DataFrame({"A": [1, None], "B": [-1, 2]}) result = df.style.bar(align="zero", axis=None)._compute().ctx expected = { (0, 0): [ @@ -1012,14 +1012,14 @@ def test_bar_align_zero_nans(self): assert result == expected def test_bar_bad_align_raises(self): - df = pd.DataFrame({"A": [-100, -60, -30, -20]}) + df = DataFrame({"A": [-100, -60, -30, -20]}) msg = "`align` must be one of {'left', 'zero',' mid'}" with pytest.raises(ValueError, match=msg): df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]) def test_format_with_na_rep(self): # GH 21527 28358 - df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) ctx = df.style.format(None, na_rep="-")._translate() assert ctx["body"][0][1]["display_value"] == "-" @@ -1037,7 +1037,7 @@ def test_format_with_na_rep(self): def test_init_with_na_rep(self): # GH 21527 28358 - df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) ctx = Styler(df, na_rep="NA")._translate() assert ctx["body"][0][1]["display_value"] == "NA" @@ -1045,7 +1045,7 @@ def test_init_with_na_rep(self): def test_set_na_rep(self): # GH 21527 28358 - df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) ctx = df.style.set_na_rep("NA")._translate() assert ctx["body"][0][1]["display_value"] == "NA" @@ -1061,7 +1061,7 @@ def test_set_na_rep(self): def test_format_non_numeric_na(self): # GH 21527 28358 - df = pd.DataFrame( + df = DataFrame( { "object": [None, np.nan, "foo"], "datetime": [None, pd.NaT, pd.Timestamp("20120101")], @@ -1082,20 +1082,20 @@ def test_format_non_numeric_na(self): def test_format_with_bad_na_rep(self): # GH 21527 28358 - df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) msg = "Expected a string, got -1 instead" with pytest.raises(TypeError, match=msg): df.style.format(None, na_rep=-1) def test_highlight_null(self, null_color="red"): - df = pd.DataFrame({"A": [0, np.nan]}) + df = DataFrame({"A": [0, np.nan]}) result = df.style.highlight_null()._compute().ctx expected = {(1, 0): ["background-color: red"]} assert result == expected def test_highlight_null_subset(self): # GH 31345 - df = pd.DataFrame({"A": [0, np.nan], "B": [0, np.nan]}) + df = DataFrame({"A": [0, np.nan], "B": [0, np.nan]}) result = ( df.style.highlight_null(null_color="red", subset=["A"]) .highlight_null(null_color="green", subset=["B"]) @@ -1109,7 +1109,7 @@ def test_highlight_null_subset(self): assert result == expected def test_nonunique_raises(self): - df = pd.DataFrame([[1, 2]], columns=["A", "A"]) + df = DataFrame([[1, 2]], columns=["A", "A"]) msg = "style is not supported for non-unique indices." with pytest.raises(ValueError, match=msg): df.style @@ -1139,7 +1139,7 @@ def test_uuid(self): def test_unique_id(self): # See https://github.com/pandas-dev/pandas/issues/16780 - df = pd.DataFrame({"a": [1, 3, 5, 6], "b": [2, 4, 12, 21]}) + df = DataFrame({"a": [1, 3, 5, 6], "b": [2, 4, 12, 21]}) result = df.style.render(uuid="test") assert "test" in result ids = re.findall('id="(.*?)"', result) @@ -1178,13 +1178,13 @@ def test_precision(self): def test_apply_none(self): def f(x): - return pd.DataFrame( + return DataFrame( np.where(x == x.max(), "color: red", ""), index=x.index, columns=x.columns, ) - result = pd.DataFrame([[1, 2], [3, 4]]).style.apply(f, axis=None)._compute().ctx + result = DataFrame([[1, 2], [3, 4]]).style.apply(f, axis=None)._compute().ctx assert result[(1, 1)] == ["color: red"] def test_trim(self): @@ -1195,7 +1195,7 @@ def test_trim(self): assert result.count("#") == len(self.df.columns) def test_highlight_max(self): - df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) # max(df) = min(-df) for max_ in [True, False]: if max_: @@ -1246,7 +1246,7 @@ def test_export(self): style2.render() def test_display_format(self): - df = pd.DataFrame(np.random.random(size=(2, 2))) + df = DataFrame(np.random.random(size=(2, 2))) ctx = df.style.format("{:0.1f}")._translate() assert all(["display_value" in c for c in row] for row in ctx["body"]) @@ -1256,7 +1256,7 @@ def test_display_format(self): assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3 def test_display_format_raises(self): - df = pd.DataFrame(np.random.randn(2, 2)) + df = DataFrame(np.random.randn(2, 2)) msg = "Expected a template string or callable, got 5 instead" with pytest.raises(TypeError, match=msg): df.style.format(5) @@ -1267,7 +1267,7 @@ def test_display_format_raises(self): def test_display_set_precision(self): # Issue #13257 - df = pd.DataFrame(data=[[1.0, 2.0090], [3.2121, 4.566]], columns=["a", "b"]) + df = DataFrame(data=[[1.0, 2.0090], [3.2121, 4.566]], columns=["a", "b"]) s = Styler(df) ctx = s.set_precision(1)._translate() @@ -1293,7 +1293,7 @@ def test_display_set_precision(self): assert ctx["body"][1][2]["display_value"] == "4.566" def test_display_subset(self): - df = pd.DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"]) + df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"]) ctx = df.style.format( {"a": "{:0.1f}", "b": "{0:.2%}"}, subset=pd.IndexSlice[0, :] )._translate() @@ -1324,7 +1324,7 @@ def test_display_subset(self): assert ctx["body"][1][2]["display_value"] == raw_11 def test_display_dict(self): - df = pd.DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"]) + df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"]) ctx = df.style.format({"a": "{:0.1f}", "b": "{0:.2%}"})._translate() assert ctx["body"][0][1]["display_value"] == "0.1" assert ctx["body"][0][2]["display_value"] == "12.34%" @@ -1334,7 +1334,7 @@ def test_display_dict(self): assert ctx["body"][0][3]["display_value"] == "AAA" def test_bad_apply_shape(self): - df = pd.DataFrame([[1, 2], [3, 4]]) + df = DataFrame([[1, 2], [3, 4]]) msg = "returned the wrong shape" with pytest.raises(ValueError, match=msg): df.style._apply(lambda x: "x", subset=pd.IndexSlice[[0, 1], :]) @@ -1356,16 +1356,16 @@ def test_apply_bad_return(self): def f(x): return "" - df = pd.DataFrame([[1, 2], [3, 4]]) + df = DataFrame([[1, 2], [3, 4]]) msg = "must return a DataFrame when passed to `Styler.apply` with axis=None" with pytest.raises(TypeError, match=msg): df.style._apply(f, axis=None) def test_apply_bad_labels(self): def f(x): - return pd.DataFrame(index=[1, 2], columns=["a", "b"]) + return DataFrame(index=[1, 2], columns=["a", "b"]) - df = pd.DataFrame([[1, 2], [3, 4]]) + df = DataFrame([[1, 2], [3, 4]]) msg = "must have identical index and columns as the input" with pytest.raises(ValueError, match=msg): df.style._apply(f, axis=None) @@ -1400,7 +1400,7 @@ def test_get_level_lengths_un_sorted(self): tm.assert_dict_equal(result, expected) def test_mi_sparse(self): - df = pd.DataFrame( + df = DataFrame( {"A": [1, 2]}, index=pd.MultiIndex.from_arrays([["a", "a"], [0, 1]]) ) @@ -1467,7 +1467,7 @@ def test_mi_sparse(self): def test_mi_sparse_disabled(self): with pd.option_context("display.multi_sparse", False): - df = pd.DataFrame( + df = DataFrame( {"A": [1, 2]}, index=pd.MultiIndex.from_arrays([["a", "a"], [0, 1]]) ) result = df.style._translate() @@ -1476,7 +1476,7 @@ def test_mi_sparse_disabled(self): assert "attributes" not in row[0] def test_mi_sparse_index_names(self): - df = pd.DataFrame( + df = DataFrame( {"A": [1, 2]}, index=pd.MultiIndex.from_arrays( [["a", "a"], [0, 1]], names=["idx_level_0", "idx_level_1"] @@ -1493,7 +1493,7 @@ def test_mi_sparse_index_names(self): assert head == expected def test_mi_sparse_column_names(self): - df = pd.DataFrame( + df = DataFrame( np.arange(16).reshape(4, 4), index=pd.MultiIndex.from_arrays( [["a", "a", "b", "a"], [0, 1, 1, 2]], @@ -1574,7 +1574,7 @@ def test_hide_single_index(self): def test_hide_multiindex(self): # GH 14194 - df = pd.DataFrame( + df = DataFrame( {"A": [1, 2]}, index=pd.MultiIndex.from_arrays( [["a", "a"], [0, 1]], names=["idx_level_0", "idx_level_1"] @@ -1628,7 +1628,7 @@ def test_hide_columns_mult_levels(self): i2 = pd.MultiIndex.from_arrays( [["b", "b"], [0, 1]], names=["col_level_0", "col_level_1"] ) - df = pd.DataFrame([[1, 2], [3, 4]], index=i1, columns=i2) + df = DataFrame([[1, 2], [3, 4]], index=i1, columns=i2) ctx = df.style._translate() # column headers assert ctx["head"][0][2]["is_visible"] @@ -1685,7 +1685,7 @@ def f(a, b, styler): def test_no_cell_ids(self): # GH 35588 # GH 35663 - df = pd.DataFrame(data=[[0]]) + df = DataFrame(data=[[0]]) styler = Styler(df, uuid="_", cell_ids=False) styler.render() s = styler.render() # render twice to ensure ctx is not updated @@ -1714,14 +1714,14 @@ def test_set_data_classes(self, classes): def test_colspan_w3(self): # GH 36223 - df = pd.DataFrame(data=[[1, 2]], columns=[["l0", "l0"], ["l1a", "l1b"]]) + df = DataFrame(data=[[1, 2]], columns=[["l0", "l0"], ["l1a", "l1b"]]) s = Styler(df, uuid="_", cell_ids=False) assert 'l0' in s.render() @pytest.mark.parametrize("len_", [1, 5, 32, 33, 100]) def test_uuid_len(self, len_): # GH 36345 - df = pd.DataFrame(data=[["A"]]) + df = DataFrame(data=[["A"]]) s = Styler(df, uuid_len=len_, cell_ids=False).render() strt = s.find('id="T_') end = s[strt + 6 :].find('"') @@ -1733,7 +1733,7 @@ def test_uuid_len(self, len_): @pytest.mark.parametrize("len_", [-2, "bad", None]) def test_uuid_len_raises(self, len_): # GH 36345 - df = pd.DataFrame(data=[["A"]]) + df = DataFrame(data=[["A"]]) msg = "``uuid_len`` must be an integer in range \\[0, 32\\]." with pytest.raises(TypeError, match=msg): Styler(df, uuid_len=len_, cell_ids=False).render() @@ -1742,7 +1742,7 @@ def test_uuid_len_raises(self, len_): @td.skip_if_no_mpl class TestStylerMatplotlibDep: def test_background_gradient(self): - df = pd.DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) + df = DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) for c_map in [None, "YlOrRd"]: result = df.style.background_gradient(cmap=c_map)._compute().ctx @@ -1776,13 +1776,13 @@ def test_background_gradient(self): ], ) def test_text_color_threshold(self, c_map, expected): - df = pd.DataFrame([1, 2], columns=["A"]) + df = DataFrame([1, 2], columns=["A"]) result = df.style.background_gradient(cmap=c_map)._compute().ctx assert result == expected @pytest.mark.parametrize("text_color_threshold", [1.1, "1", -1, [2, 2]]) def test_text_color_threshold_raises(self, text_color_threshold): - df = pd.DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) + df = DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) msg = "`text_color_threshold` must be a value from 0 to 1." with pytest.raises(ValueError, match=msg): df.style.background_gradient( @@ -1791,7 +1791,7 @@ def test_text_color_threshold_raises(self, text_color_threshold): @td.skip_if_no_mpl def test_background_gradient_axis(self): - df = pd.DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) + df = DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) low = ["background-color: #f7fbff", "color: #000000"] high = ["background-color: #08306b", "color: #f1f1f1"] @@ -1816,7 +1816,7 @@ def test_background_gradient_axis(self): def test_background_gradient_vmin_vmax(self): # GH 12145 - df = pd.DataFrame(range(5)) + df = DataFrame(range(5)) ctx = df.style.background_gradient(vmin=1, vmax=3)._compute().ctx assert ctx[(0, 0)] == ctx[(1, 0)] assert ctx[(4, 0)] == ctx[(3, 0)] @@ -1873,5 +1873,5 @@ def test_from_custom_template(tmpdir): assert issubclass(result, Styler) assert result.env is not Styler.env assert result.template is not Styler.template - styler = result(pd.DataFrame({"A": [1, 2]})) + styler = result(DataFrame({"A": [1, 2]})) assert styler.render() diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index e2ceb95d77053..3584ec047d4d2 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -150,7 +150,7 @@ def test_to_csv_decimal(self): ) # see gh-11553: testing if decimal is taken into account for '0.0' - df = pd.DataFrame({"a": [0, 1.1], "b": [2.2, 3.3], "c": 1}) + df = DataFrame({"a": [0, 1.1], "b": [2.2, 3.3], "c": 1}) expected_rows = ["a,b,c", "0^0,2^2,1", "1^1,3^3,1"] expected = tm.convert_rows_list_to_csv_str(expected_rows) @@ -165,7 +165,7 @@ def test_to_csv_decimal(self): def test_to_csv_float_format(self): # testing if float_format is taken into account for the index # GH 11553 - df = pd.DataFrame({"a": [0, 1], "b": [2.2, 3.3], "c": 1}) + df = DataFrame({"a": [0, 1], "b": [2.2, 3.3], "c": 1}) expected_rows = ["a,b,c", "0,2.20,1", "1,3.30,1"] expected = tm.convert_rows_list_to_csv_str(expected_rows) @@ -334,7 +334,7 @@ def test_to_csv_single_level_multi_index(self, ind, expected, klass): def test_to_csv_string_array_ascii(self): # GH 10813 str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}] - df = pd.DataFrame(str_array) + df = DataFrame(str_array) expected_ascii = """\ ,names 0,"['foo', 'bar']" @@ -348,7 +348,7 @@ def test_to_csv_string_array_ascii(self): def test_to_csv_string_array_utf8(self): # GH 10813 str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}] - df = pd.DataFrame(str_array) + df = DataFrame(str_array) expected_utf8 = """\ ,names 0,"['foo', 'bar']" @@ -362,7 +362,7 @@ def test_to_csv_string_array_utf8(self): def test_to_csv_string_with_lf(self): # GH 20353 data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]} - df = pd.DataFrame(data) + df = DataFrame(data) with tm.ensure_clean("lf_test.csv") as path: # case 1: The default line terminator(=os.linesep)(PR 21406) os_linesep = os.linesep.encode("utf-8") @@ -396,7 +396,7 @@ def test_to_csv_string_with_lf(self): def test_to_csv_string_with_crlf(self): # GH 20353 data = {"int": [1, 2, 3], "str_crlf": ["abc", "d\r\nef", "g\r\nh\r\n\r\ni"]} - df = pd.DataFrame(data) + df = DataFrame(data) with tm.ensure_clean("crlf_test.csv") as path: # case 1: The default line terminator(=os.linesep)(PR 21406) os_linesep = os.linesep.encode("utf-8") @@ -434,9 +434,7 @@ def test_to_csv_string_with_crlf(self): def test_to_csv_stdout_file(self, capsys): # GH 21561 - df = pd.DataFrame( - [["foo", "bar"], ["baz", "qux"]], columns=["name_1", "name_2"] - ) + df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["name_1", "name_2"]) expected_rows = [",name_1,name_2", "0,foo,bar", "1,baz,qux"] expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows) @@ -456,7 +454,7 @@ def test_to_csv_stdout_file(self, capsys): ) def test_to_csv_write_to_open_file(self): # GH 21696 - df = pd.DataFrame({"a": ["x", "y", "z"]}) + df = DataFrame({"a": ["x", "y", "z"]}) expected = """\ manual header x @@ -473,7 +471,7 @@ def test_to_csv_write_to_open_file(self): def test_to_csv_write_to_open_file_with_newline_py3(self): # see gh-21696 # see gh-20353 - df = pd.DataFrame({"a": ["x", "y", "z"]}) + df = DataFrame({"a": ["x", "y", "z"]}) expected_rows = ["x", "y", "z"] expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows) with tm.ensure_clean("test.txt") as path: @@ -557,7 +555,7 @@ def test_to_csv_zip_arguments(self, compression, archive_name): @pytest.mark.parametrize("df_new_type", ["Int64"]) def test_to_csv_na_rep_long_string(self, df_new_type): # see gh-25099 - df = pd.DataFrame({"c": [float("nan")] * 3}) + df = DataFrame({"c": [float("nan")] * 3}) df = df.astype(df_new_type) expected_rows = ["c", "mynull", "mynull", "mynull"] expected = tm.convert_rows_list_to_csv_str(expected_rows) @@ -635,7 +633,7 @@ def test_to_csv_encoding_binary_handle(self): # example from GH 13068 with tm.ensure_clean() as path: with open(path, "w+b") as handle: - pd.DataFrame().to_csv(handle, mode="w+b", encoding="utf-8-sig") + DataFrame().to_csv(handle, mode="w+b", encoding="utf-8-sig") handle.seek(0) assert handle.read().startswith(b'\xef\xbb\xbf""') diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 7acdbfd462874..18cbd7186e931 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -787,13 +787,13 @@ def test_html_repr_min_rows_default(datapath): # gh-27991 # default setting no truncation even if above min_rows - df = pd.DataFrame({"a": range(20)}) + df = DataFrame({"a": range(20)}) result = df._repr_html_() expected = expected_html(datapath, "html_repr_min_rows_default_no_truncation") assert result == expected # default of max_rows 60 triggers truncation if above - df = pd.DataFrame({"a": range(61)}) + df = DataFrame({"a": range(61)}) result = df._repr_html_() expected = expected_html(datapath, "html_repr_min_rows_default_truncated") assert result == expected @@ -815,7 +815,7 @@ def test_html_repr_min_rows_default(datapath): def test_html_repr_min_rows(datapath, max_rows, min_rows, expected): # gh-27991 - df = pd.DataFrame({"a": range(61)}) + df = DataFrame({"a": range(61)}) expected = expected_html(datapath, expected) with option_context("display.max_rows", max_rows, "display.min_rows", min_rows): result = df._repr_html_() diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index b2edb5309f299..855e69dee7db4 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -157,7 +157,7 @@ def test_to_latex_series(self): def test_to_latex_midrule_location(self): # GH 18326 - df = pd.DataFrame({"a": [1, 2]}) + df = DataFrame({"a": [1, 2]}) df.index.name = "foo" result = df.to_latex(index_names=False) expected = _dedent( @@ -373,7 +373,7 @@ def test_to_latex_decimal(self): class TestToLatexBold: def test_to_latex_bold_rows(self): # GH 16707 - df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) result = df.to_latex(bold_rows=True) expected = _dedent( r""" @@ -391,7 +391,7 @@ def test_to_latex_bold_rows(self): def test_to_latex_no_bold_rows(self): # GH 16707 - df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) result = df.to_latex(bold_rows=False) expected = _dedent( r""" @@ -572,7 +572,7 @@ def test_to_latex_caption_shortcaption_and_label( ) def test_to_latex_bad_caption_raises(self, bad_caption): # test that wrong number of params is raised - df = pd.DataFrame({"a": [1]}) + df = DataFrame({"a": [1]}) msg = "caption must be either a string or a tuple of two strings" with pytest.raises(ValueError, match=msg): df.to_latex(caption=bad_caption) @@ -990,7 +990,7 @@ def multiindex_frame(self): @pytest.fixture def multicolumn_frame(self): """Multicolumn dataframe for testing multicolumn LaTeX macros.""" - yield pd.DataFrame( + yield DataFrame( { ("c1", 0): {x: x for x in range(5)}, ("c1", 1): {x: x + 5 for x in range(5)}, @@ -1002,7 +1002,7 @@ def multicolumn_frame(self): def test_to_latex_multindex_header(self): # GH 16718 - df = pd.DataFrame({"a": [0], "b": [1], "c": [2], "d": [3]}) + df = DataFrame({"a": [0], "b": [1], "c": [2], "d": [3]}) df = df.set_index(["a", "b"]) observed = df.to_latex(header=["r1", "r2"]) expected = _dedent( @@ -1022,7 +1022,7 @@ def test_to_latex_multindex_header(self): def test_to_latex_multiindex_empty_name(self): # GH 18669 mi = pd.MultiIndex.from_product([[1, 2]], names=[""]) - df = pd.DataFrame(-1, index=mi, columns=range(4)) + df = DataFrame(-1, index=mi, columns=range(4)) observed = df.to_latex() expected = _dedent( r""" @@ -1115,7 +1115,7 @@ def test_to_latex_multicolumn_tabular(self, multiindex_frame): def test_to_latex_index_has_name_tabular(self): # GH 10660 - df = pd.DataFrame({"a": [0, 0, 1, 1], "b": list("abab"), "c": [1, 2, 3, 4]}) + df = DataFrame({"a": [0, 0, 1, 1], "b": list("abab"), "c": [1, 2, 3, 4]}) result = df.set_index(["a", "b"]).to_latex() expected = _dedent( r""" @@ -1136,7 +1136,7 @@ def test_to_latex_index_has_name_tabular(self): def test_to_latex_groupby_tabular(self): # GH 10660 - df = pd.DataFrame({"a": [0, 0, 1, 1], "b": list("abab"), "c": [1, 2, 3, 4]}) + df = DataFrame({"a": [0, 0, 1, 1], "b": list("abab"), "c": [1, 2, 3, 4]}) result = df.groupby("a").describe().to_latex() expected = _dedent( r""" @@ -1162,7 +1162,7 @@ def test_to_latex_multiindex_dupe_level(self): # ONLY happen if all higher order indices (to the left) are # equal too. In this test, 'c' has to be printed both times # because the higher order index 'A' != 'B'. - df = pd.DataFrame( + df = DataFrame( index=pd.MultiIndex.from_tuples([("A", "c"), ("B", "c")]), columns=["col"] ) result = df.to_latex() @@ -1275,7 +1275,7 @@ def test_to_latex_multiindex_names(self, name0, name1, axes): # GH 18667 names = [name0, name1] mi = pd.MultiIndex.from_product([[1, 2], [3, 4]]) - df = pd.DataFrame(-1, index=mi.copy(), columns=mi.copy()) + df = DataFrame(-1, index=mi.copy(), columns=mi.copy()) for idx in axes: df.axes[idx].names = names @@ -1307,7 +1307,7 @@ def test_to_latex_multiindex_names(self, name0, name1, axes): @pytest.mark.parametrize("one_row", [True, False]) def test_to_latex_multiindex_nans(self, one_row): # GH 14249 - df = pd.DataFrame({"a": [None, 1], "b": [2, 3], "c": [4, 5]}) + df = DataFrame({"a": [None, 1], "b": [2, 3], "c": [4, 5]}) if one_row: df = df.iloc[[0]] observed = df.set_index(["a", "b"]).to_latex() @@ -1331,7 +1331,7 @@ def test_to_latex_multiindex_nans(self, one_row): def test_to_latex_non_string_index(self): # GH 19981 - df = pd.DataFrame([[1, 2, 3]] * 2).set_index([0, 1]) + df = DataFrame([[1, 2, 3]] * 2).set_index([0, 1]) result = df.to_latex() expected = _dedent( r""" diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 8f1ed193b100f..71698a02285f9 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -240,7 +240,7 @@ def test_build_series(self): def test_read_json_from_to_json_results(self): # GH32383 - df = pd.DataFrame( + df = DataFrame( { "_id": {"row_0": 0}, "category": {"row_0": "Goods"}, @@ -616,13 +616,13 @@ def test_set_names_unset(self, idx, nm, prop): ) def test_warns_non_roundtrippable_names(self, idx): # GH 19130 - df = pd.DataFrame(index=idx) + df = DataFrame(index=idx) df.index.name = "index" with tm.assert_produces_warning(): set_default_names(df) def test_timestamp_in_columns(self): - df = pd.DataFrame( + df = DataFrame( [[1, 2]], columns=[pd.Timestamp("2016"), pd.Timedelta(10, unit="s")] ) result = df.to_json(orient="table") @@ -634,8 +634,8 @@ def test_timestamp_in_columns(self): "case", [ pd.Series([1], index=pd.Index([1], name="a"), name="a"), - pd.DataFrame({"A": [1]}, index=pd.Index([1], name="A")), - pd.DataFrame( + DataFrame({"A": [1]}, index=pd.Index([1], name="A")), + DataFrame( {"A": [1]}, index=pd.MultiIndex.from_arrays([["a"], [1]], names=["A", "a"]), ), @@ -647,7 +647,7 @@ def test_overlapping_names(self, case): def test_mi_falsey_name(self): # GH 16203 - df = pd.DataFrame( + df = DataFrame( np.random.randn(4, 4), index=pd.MultiIndex.from_product([("A", "B"), ("a", "b")]), ) @@ -730,7 +730,7 @@ def test_comprehensive(self): ) def test_multiindex(self, index_names): # GH 18912 - df = pd.DataFrame( + df = DataFrame( [["Arr", "alpha", [1, 2, 3, 4]], ["Bee", "Beta", [10, 20, 30, 40]]], index=[["A", "B"], ["Null", "Eins"]], columns=["Aussprache", "Griechisch", "Args"], @@ -742,7 +742,7 @@ def test_multiindex(self, index_names): def test_empty_frame_roundtrip(self): # GH 21287 - df = pd.DataFrame(columns=["a", "b", "c"]) + df = DataFrame(columns=["a", "b", "c"]) expected = df.copy() out = df.to_json(orient="table") result = pd.read_json(out, orient="table") diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 4d8d4ecb50a5a..92cc0f969ec87 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -106,7 +106,7 @@ def test_frame_non_unique_columns(self, orient, data): df.to_json(orient=orient), orient=orient, convert_dates=["x"] ) if orient == "values": - expected = pd.DataFrame(data) + expected = DataFrame(data) if expected.iloc[:, 0].dtype == "datetime64[ns]": # orient == "values" by default will write Timestamp objects out # in milliseconds; these are internally stored in nanosecond, @@ -373,7 +373,7 @@ def test_frame_infinity(self, orient, inf, dtype): ], ) def test_frame_to_json_float_precision(self, value, precision, expected_val): - df = pd.DataFrame([dict(a_float=value)]) + df = DataFrame([dict(a_float=value)]) encoded = df.to_json(double_precision=precision) assert encoded == f'{{"a_float":{{"0":{expected_val}}}}}' @@ -390,7 +390,7 @@ def test_frame_empty(self): read_json(df.to_json(), dtype=dict(df.dtypes)), df, check_index_type=False ) # GH 7445 - result = pd.DataFrame({"test": []}, index=[]).to_json(orient="columns") + result = DataFrame({"test": []}, index=[]).to_json(orient="columns") expected = '{"test":{}}' assert result == expected @@ -599,7 +599,7 @@ def __str__(self) -> str: def test_label_overflow(self): # GH14256: buffer length not checked when writing label - result = pd.DataFrame({"bar" * 100000: [1], "foo": [1337]}).to_json() + result = DataFrame({"bar" * 100000: [1], "foo": [1337]}).to_json() expected = f'{{"{"bar" * 100000}":{{"0":1}},"foo":{{"0":1337}}}}' assert result == expected @@ -1143,7 +1143,7 @@ def test_datetime_tz(self): def test_sparse(self): # GH4377 df.to_json segfaults with non-ndarray blocks - df = pd.DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.randn(10, 4)) df.loc[:8] = np.nan sdf = df.astype("Sparse") @@ -1366,7 +1366,7 @@ def test_from_json_to_json_table_index_and_columns(self, index, columns): def test_from_json_to_json_table_dtypes(self): # GH21345 - expected = pd.DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]}) + expected = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]}) dfjson = expected.to_json(orient="table") result = pd.read_json(dfjson, orient="table") tm.assert_frame_equal(result, expected) @@ -1374,7 +1374,7 @@ def test_from_json_to_json_table_dtypes(self): @pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}]) def test_read_json_table_dtype_raises(self, dtype): # GH21345 - df = pd.DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]}) + df = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]}) dfjson = df.to_json(orient="table") msg = "cannot pass both dtype and orient='table'" with pytest.raises(ValueError, match=msg): @@ -1459,7 +1459,7 @@ def test_index_false_error_to_json(self, orient): # GH 17394 # Testing error message from to_json with index=False - df = pd.DataFrame([[1, 2], [4, 5]], columns=["a", "b"]) + df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"]) msg = "'index=False' is only valid when 'orient' is 'split' or 'table'" with pytest.raises(ValueError, match=msg): @@ -1487,7 +1487,7 @@ def test_read_timezone_information(self): "date_format,key", [("epoch", 86400000), ("iso", "P1DT0H0M0S")] ) def test_timedelta_as_label(self, date_format, key): - df = pd.DataFrame([[1]], columns=[pd.Timedelta("1D")]) + df = DataFrame([[1]], columns=[pd.Timedelta("1D")]) expected = f'{{"{key}":{{"0":1}}}}' result = df.to_json(date_format=date_format) @@ -1506,14 +1506,14 @@ def test_timedelta_as_label(self, date_format, key): ) def test_tuple_labels(self, orient, expected): # GH 20500 - df = pd.DataFrame([[1]], index=[("a", "b")], columns=[("c", "d")]) + df = DataFrame([[1]], index=[("a", "b")], columns=[("c", "d")]) result = df.to_json(orient=orient) assert result == expected @pytest.mark.parametrize("indent", [1, 2, 4]) def test_to_json_indent(self, indent): # GH 12004 - df = pd.DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"]) + df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"]) result = df.to_json(indent=indent) spaces = " " * indent @@ -1649,19 +1649,19 @@ def test_to_json_indent(self, indent): ) def test_json_indent_all_orients(self, orient, expected): # GH 12004 - df = pd.DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"]) + df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"]) result = df.to_json(orient=orient, indent=4) assert result == expected def test_json_negative_indent_raises(self): with pytest.raises(ValueError, match="must be a nonnegative integer"): - pd.DataFrame().to_json(indent=-1) + DataFrame().to_json(indent=-1) def test_emca_262_nan_inf_support(self): # GH 12213 data = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]' result = pd.read_json(data) - expected = pd.DataFrame( + expected = DataFrame( ["a", np.nan, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"] ) tm.assert_frame_equal(result, expected) @@ -1684,7 +1684,7 @@ def test_frame_int_overflow(self): "dataframe,expected", [ ( - pd.DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}), + DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}), '{"(0, \'x\')":1,"(0, \'y\')":"a","(1, \'x\')":2,' '"(1, \'y\')":"b","(2, \'x\')":3,"(2, \'y\')":"c"}', ) @@ -1719,12 +1719,12 @@ def test_to_s3(self, s3_resource, s3so): def test_json_pandas_na(self): # GH 31615 - result = pd.DataFrame([[pd.NA]]).to_json() + result = DataFrame([[pd.NA]]).to_json() assert result == '{"0":{"0":null}}' def test_json_pandas_nulls(self, nulls_fixture): # GH 31615 - result = pd.DataFrame([[nulls_fixture]]).to_json() + result = DataFrame([[nulls_fixture]]).to_json() assert result == '{"0":{"0":null}}' def test_readjson_bool_series(self): diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index a6ffa7e97d375..933bdc462e3f8 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -12,7 +12,7 @@ @pytest.fixture def lines_json_df(): - df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) return df.to_json(lines=True, orient="records") @@ -112,7 +112,7 @@ def test_readjson_each_chunk(lines_json_df): def test_readjson_chunks_from_file(): with tm.ensure_clean("test.json") as path: - df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) df.to_json(path, lines=True, orient="records") chunked = pd.concat(pd.read_json(path, lines=True, chunksize=1)) unchunked = pd.read_json(path, lines=True) @@ -122,7 +122,7 @@ def test_readjson_chunks_from_file(): @pytest.mark.parametrize("chunksize", [None, 1]) def test_readjson_chunks_closes(chunksize): with tm.ensure_clean("test.json") as path: - df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) df.to_json(path, lines=True, orient="records") reader = JsonReader( path, @@ -173,7 +173,7 @@ def test_readjson_chunks_multiple_empty_lines(chunksize): {"A":3,"B":6} """ - orig = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + orig = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) test = pd.read_json(j, lines=True, chunksize=chunksize) if chunksize is not None: test = pd.concat(test) @@ -187,7 +187,7 @@ def test_readjson_unicode(monkeypatch): f.write('{"£©µÀÆÖÞßéöÿ":["АБВГДабвгд가"]}') result = read_json(path) - expected = pd.DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]}) + expected = DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]}) tm.assert_frame_equal(result, expected) @@ -200,7 +200,7 @@ def test_readjson_nrows(nrows): {"a": 5, "b": 6} {"a": 7, "b": 8}""" result = pd.read_json(jsonl, lines=True, nrows=nrows) - expected = pd.DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows] + expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows] tm.assert_frame_equal(result, expected) @@ -214,7 +214,7 @@ def test_readjson_nrows_chunks(nrows, chunksize): {"a": 7, "b": 8}""" reader = read_json(jsonl, lines=True, nrows=nrows, chunksize=chunksize) chunked = pd.concat(reader) - expected = pd.DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows] + expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows] tm.assert_frame_equal(chunked, expected) @@ -234,9 +234,9 @@ def test_readjson_lines_chunks_fileurl(datapath): # GH 27135 # Test reading line-format JSON from file url df_list_expected = [ - pd.DataFrame([[1, 2]], columns=["a", "b"], index=[0]), - pd.DataFrame([[3, 4]], columns=["a", "b"], index=[1]), - pd.DataFrame([[5, 6]], columns=["a", "b"], index=[2]), + DataFrame([[1, 2]], columns=["a", "b"], index=[0]), + DataFrame([[3, 4]], columns=["a", "b"], index=[1]), + DataFrame([[5, 6]], columns=["a", "b"], index=[2]), ] os_path = datapath("io", "json", "data", "line_delimited.json") file_url = Path(os_path).as_uri() diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py index 6ac310e3b2227..861aeba60cab7 100644 --- a/pandas/tests/io/parser/test_dtypes.py +++ b/pandas/tests/io/parser/test_dtypes.py @@ -577,7 +577,7 @@ def test_boolean_dtype(all_parsers): ) result = parser.read_csv(StringIO(data), dtype="boolean") - expected = pd.DataFrame( + expected = DataFrame( { "a": pd.array( [ diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index d45317aaa3458..4796cf0b79fae 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -509,7 +509,7 @@ def test_dtype(dtype): colspecs = [(0, 5), (5, 10), (10, None)] result = read_fwf(StringIO(data), colspecs=colspecs, dtype=dtype) - expected = pd.DataFrame( + expected = DataFrame( {"a": [1, 3], "b": [2, 4], "c": [3.2, 5.2]}, columns=["a", "b", "c"] ) @@ -625,7 +625,7 @@ def test_binary_mode(): """ data = """aas aas aas bba bab b a""" - df_reference = pd.DataFrame( + df_reference = DataFrame( [["bba", "bab", "b a"]], columns=["aas", "aas.1", "aas.2"], index=[0] ) with tm.ensure_clean() as path: @@ -653,5 +653,5 @@ def test_encoding_mmap(memory_map): memory_map=memory_map, ) data.seek(0) - df_reference = pd.DataFrame([[1, "A", "Ä", 2]]) + df_reference = DataFrame([[1, "A", "Ä", 2]]) tm.assert_frame_equal(df, df_reference) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 7eeba97b799ae..ba2805f2f063f 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -64,7 +64,7 @@ @pytest.mark.single class TestHDFStore: def test_format_type(self, setup_path): - df = pd.DataFrame({"A": [1, 2]}) + df = DataFrame({"A": [1, 2]}) with ensure_clean_path(setup_path) as path: with HDFStore(path) as store: store.put("a", df, format="fixed") @@ -300,7 +300,7 @@ def checksum(filename, hash_factory=hashlib.md5, chunk_num_blocks=128): def create_h5_and_return_checksum(track_times): with ensure_clean_path(setup_path) as path: - df = pd.DataFrame({"a": [1]}) + df = DataFrame({"a": [1]}) with pd.HDFStore(path, mode="w") as hdf: hdf.put( @@ -657,10 +657,10 @@ def test_get(self, setup_path): def test_walk(self, where, expected, setup_path): # GH10143 objs = { - "df1": pd.DataFrame([1, 2, 3]), - "df2": pd.DataFrame([4, 5, 6]), - "df3": pd.DataFrame([6, 7, 8]), - "df4": pd.DataFrame([9, 10, 11]), + "df1": DataFrame([1, 2, 3]), + "df2": DataFrame([4, 5, 6]), + "df3": DataFrame([6, 7, 8]), + "df4": DataFrame([9, 10, 11]), "s1": Series([10, 9, 8]), # Next 3 items aren't pandas objects and should be ignored "a1": np.array([[1, 2, 3], [4, 5, 6]]), @@ -1267,7 +1267,7 @@ def test_append_all_nans(self, setup_path): def test_read_missing_key_close_store(self, setup_path): # GH 25766 with ensure_clean_path(setup_path) as path: - df = pd.DataFrame({"a": range(2), "b": range(2)}) + df = DataFrame({"a": range(2), "b": range(2)}) df.to_hdf(path, "k1") with pytest.raises(KeyError, match="'No object named k2 in the file'"): @@ -1280,7 +1280,7 @@ def test_read_missing_key_close_store(self, setup_path): def test_read_missing_key_opened_store(self, setup_path): # GH 28699 with ensure_clean_path(setup_path) as path: - df = pd.DataFrame({"a": range(2), "b": range(2)}) + df = DataFrame({"a": range(2), "b": range(2)}) df.to_hdf(path, "k1") with pd.HDFStore(path, "r") as store: @@ -1921,7 +1921,7 @@ def test_mi_data_columns(self, setup_path): idx = pd.MultiIndex.from_arrays( [date_range("2000-01-01", periods=5), range(5)], names=["date", "id"] ) - df = pd.DataFrame({"a": [1.1, 1.2, 1.3, 1.4, 1.5]}, index=idx) + df = DataFrame({"a": [1.1, 1.2, 1.3, 1.4, 1.5]}, index=idx) with ensure_clean_store(setup_path) as store: store.append("df", df, data_columns=True) @@ -2541,7 +2541,7 @@ def test_store_index_name_numpy_str(self, table_format, setup_path): pd.to_datetime([datetime.date(2010, 1, 1), datetime.date(2010, 1, 2)]), name="rows\u05d0", ) - df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1) + df = DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1) # This used to fail, returning numpy strings instead of python strings. with ensure_clean_path(setup_path) as path: @@ -3683,7 +3683,7 @@ def test_append_to_multiple_dropna_false(self, setup_path): def test_append_to_multiple_min_itemsize(self, setup_path): # GH 11238 - df = pd.DataFrame( + df = DataFrame( { "IX": np.arange(1, 21), "Num": np.arange(1, 21), @@ -4136,7 +4136,7 @@ def test_legacy_table_fixed_format_read_py2(self, datapath, setup_path): datapath("io", "data", "legacy_hdf", "legacy_table_fixed_py2.h5"), mode="r" ) as store: result = store.select("df") - expected = pd.DataFrame( + expected = DataFrame( [[1, 2, 3, "D"]], columns=["A", "B", "C", "D"], index=pd.Index(["ABC"], name="INDEX_NAME"), @@ -4151,7 +4151,7 @@ def test_legacy_table_fixed_format_read_datetime_py2(self, datapath, setup_path) mode="r", ) as store: result = store.select("df") - expected = pd.DataFrame( + expected = DataFrame( [[pd.Timestamp("2020-02-06T18:00")]], columns=["A"], index=pd.Index(["date"]), @@ -4166,7 +4166,7 @@ def test_legacy_table_read_py2(self, datapath, setup_path): ) as store: result = store.select("table") - expected = pd.DataFrame({"a": ["a", "b"], "b": [2, 3]}) + expected = DataFrame({"a": ["a", "b"], "b": [2, 3]}) tm.assert_frame_equal(expected, result) def test_copy(self, setup_path): @@ -4286,13 +4286,13 @@ def test_unicode_index(self, setup_path): def test_unicode_longer_encoded(self, setup_path): # GH 11234 char = "\u0394" - df = pd.DataFrame({"A": [char]}) + df = DataFrame({"A": [char]}) with ensure_clean_store(setup_path) as store: store.put("df", df, format="table", encoding="utf-8") result = store.get("df") tm.assert_frame_equal(result, df) - df = pd.DataFrame({"A": ["a", char], "B": ["b", "b"]}) + df = DataFrame({"A": ["a", char], "B": ["b", "b"]}) with ensure_clean_store(setup_path) as store: store.put("df", df, format="table", encoding="utf-8") result = store.get("df") @@ -4497,7 +4497,7 @@ def test_categorical_nan_only_columns(self, setup_path): # GH18413 # Check that read_hdf with categorical columns with NaN-only values can # be read back. - df = pd.DataFrame( + df = DataFrame( { "a": ["a", "b", "c", np.nan], "b": [np.nan, np.nan, np.nan, np.nan], @@ -4734,7 +4734,7 @@ def test_read_from_py_localpath(self, setup_path): def test_query_long_float_literal(self, setup_path): # GH 14241 - df = pd.DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]}) + df = DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]}) with ensure_clean_store(setup_path) as store: store.append("test", df, format="table", data_columns=True) @@ -4755,7 +4755,7 @@ def test_query_long_float_literal(self, setup_path): def test_query_compare_column_type(self, setup_path): # GH 15492 - df = pd.DataFrame( + df = DataFrame( { "date": ["2014-01-01", "2014-01-02"], "real_date": date_range("2014-01-01", periods=2), @@ -4824,11 +4824,11 @@ def test_read_py2_hdf_file_in_py3(self, datapath): # the file was generated in Python 2.7 like so: # - # df = pd.DataFrame([1.,2,3], index=pd.PeriodIndex( + # df = DataFrame([1.,2,3], index=pd.PeriodIndex( # ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B')) # df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p') - expected = pd.DataFrame( + expected = DataFrame( [1.0, 2, 3], index=pd.PeriodIndex(["2015-01-01", "2015-01-02", "2015-01-05"], freq="B"), ) @@ -4850,7 +4850,7 @@ def test_select_empty_where(self, where): # while reading from HDF store raises # "SyntaxError: only a single expression is allowed" - df = pd.DataFrame([1, 2, 3]) + df = DataFrame([1, 2, 3]) with ensure_clean_path("empty_where.h5") as path: with pd.HDFStore(path) as store: store.put("df", df, "t") @@ -4867,7 +4867,7 @@ def test_select_empty_where(self, where): def test_to_hdf_multiindex_extension_dtype(self, idx, setup_path): # GH 7775 mi = MultiIndex.from_arrays([idx, idx]) - df = pd.DataFrame(0, index=mi, columns=["a"]) + df = DataFrame(0, index=mi, columns=["a"]) with ensure_clean_path(setup_path) as path: with pytest.raises(NotImplementedError, match="Saving a MultiIndex"): df.to_hdf(path, "df") diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index bcc5dcf9f5181..e137bc2dca48e 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -213,7 +213,7 @@ def test_append_with_timezones_pytz(setup_path): def test_roundtrip_tz_aware_index(setup_path): # GH 17618 time = pd.Timestamp("2000-01-01 01:00:00", tz="US/Eastern") - df = pd.DataFrame(data=[0], index=[time]) + df = DataFrame(data=[0], index=[time]) with ensure_clean_store(setup_path) as store: store.put("frame", df, format="fixed") @@ -224,7 +224,7 @@ def test_roundtrip_tz_aware_index(setup_path): def test_store_index_name_with_tz(setup_path): # GH 13884 - df = pd.DataFrame({"A": [1, 2]}) + df = DataFrame({"A": [1, 2]}) df.index = pd.DatetimeIndex([1234567890123456787, 1234567890123456788]) df.index = df.index.tz_localize("UTC") df.index.name = "foo" @@ -387,7 +387,7 @@ def test_read_with_where_tz_aware_index(setup_path): periods = 10 dts = pd.date_range("20151201", periods=periods, freq="D", tz="UTC") mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"]) - expected = pd.DataFrame({"MYCOL": 0}, index=mi) + expected = DataFrame({"MYCOL": 0}, index=mi) key = "mykey" with ensure_clean_path(setup_path) as path: diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index b627e0e1cad54..a454d3b855cdf 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -38,11 +38,11 @@ def df(request): data_type = request.param if data_type == "delims": - return pd.DataFrame({"a": ['"a,\t"b|c', "d\tef´"], "b": ["hi'j", "k''lm"]}) + return DataFrame({"a": ['"a,\t"b|c', "d\tef´"], "b": ["hi'j", "k''lm"]}) elif data_type == "utf8": - return pd.DataFrame({"a": ["µasd", "Ωœ∑´"], "b": ["øπ∆˚¬", "œ∑´®"]}) + return DataFrame({"a": ["µasd", "Ωœ∑´"], "b": ["øπ∆˚¬", "œ∑´®"]}) elif data_type == "utf16": - return pd.DataFrame( + return DataFrame( {"a": ["\U0001f44d\U0001f44d", "\U0001f44d\U0001f44d"], "b": ["abc", "def"]} ) elif data_type == "string": @@ -61,7 +61,7 @@ def df(request): r_idx_names=[None], ) elif data_type == "nonascii": - return pd.DataFrame({"en": "in English".split(), "es": "en español".split()}) + return DataFrame({"en": "in English".split(), "es": "en español".split()}) elif data_type == "colwidth": _cw = get_option("display.max_colwidth") + 1 return tm.makeCustomDataframe( diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 32a15e6201037..d6506d434d6a7 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1490,10 +1490,10 @@ def test_datetime_with_timezone_roundtrip(self): def test_out_of_bounds_datetime(self): # GH 26761 - data = pd.DataFrame({"date": datetime(9999, 1, 1)}, index=[0]) + data = DataFrame({"date": datetime(9999, 1, 1)}, index=[0]) data.to_sql("test_datetime_obb", self.conn, index=False) result = sql.read_sql_table("test_datetime_obb", self.conn) - expected = pd.DataFrame([pd.NaT], columns=["date"]) + expected = DataFrame([pd.NaT], columns=["date"]) tm.assert_frame_equal(result, expected) def test_naive_datetimeindex_roundtrip(self): @@ -1820,7 +1820,7 @@ def main(connectable): def test_to_sql_with_negative_npinf(self, input): # GH 34431 - df = pd.DataFrame(input) + df = DataFrame(input) if self.flavor == "mysql": msg = "inf cannot be used with MySQL" diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 30926b2bd0241..d5c2ac755ee4d 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -32,7 +32,7 @@ @pytest.fixture() def mixed_frame(): - return pd.DataFrame( + return DataFrame( { "a": [1, 2, 3, 4], "b": [1.0, 3.0, 27.0, 81.0], @@ -385,7 +385,7 @@ def test_stata_doc_examples(self): def test_write_preserves_original(self): # 9795 np.random.seed(423) - df = pd.DataFrame(np.random.randn(5, 4), columns=list("abcd")) + df = DataFrame(np.random.randn(5, 4), columns=list("abcd")) df.loc[2, "a":"c"] = np.nan df_copy = df.copy() with tm.ensure_clean() as path: @@ -636,7 +636,7 @@ def test_105(self): dpath = os.path.join(self.dirpath, "S4_EDUC1.dta") df = pd.read_stata(dpath) df0 = [[1, 1, 3, -2], [2, 1, 2, -2], [4, 1, 1, -2]] - df0 = pd.DataFrame(df0) + df0 = DataFrame(df0) df0.columns = ["clustnum", "pri_schl", "psch_num", "psch_dis"] df0["clustnum"] = df0["clustnum"].astype(np.int16) df0["pri_schl"] = df0["pri_schl"].astype(np.int8) @@ -1358,7 +1358,7 @@ def test_default_date_conversion(self): dt.datetime(2012, 12, 21, 12, 21, 12, 21000), dt.datetime(1776, 7, 4, 7, 4, 7, 4000), ] - original = pd.DataFrame( + original = DataFrame( { "nums": [1.0, 2.0, 3.0], "strs": ["apple", "banana", "cherry"], @@ -1381,7 +1381,7 @@ def test_default_date_conversion(self): tm.assert_frame_equal(reread, direct) def test_unsupported_type(self): - original = pd.DataFrame({"a": [1 + 2j, 2 + 4j]}) + original = DataFrame({"a": [1 + 2j, 2 + 4j]}) msg = "Data type complex128 not supported" with pytest.raises(NotImplementedError, match=msg): @@ -1394,7 +1394,7 @@ def test_unsupported_datetype(self): dt.datetime(2012, 12, 21, 12, 21, 12, 21000), dt.datetime(1776, 7, 4, 7, 4, 7, 4000), ] - original = pd.DataFrame( + original = DataFrame( { "nums": [1.0, 2.0, 3.0], "strs": ["apple", "banana", "cherry"], @@ -1408,7 +1408,7 @@ def test_unsupported_datetype(self): original.to_stata(path, convert_dates={"dates": "tC"}) dates = pd.date_range("1-1-1990", periods=3, tz="Asia/Hong_Kong") - original = pd.DataFrame( + original = DataFrame( { "nums": [1.0, 2.0, 3.0], "strs": ["apple", "banana", "cherry"], @@ -1439,7 +1439,7 @@ def test_stata_111(self): # SAS when exporting to Stata format. We do not know of any # on-line documentation for this version. df = read_stata(self.dta24_111) - original = pd.DataFrame( + original = DataFrame( { "y": [1, 1, 1, 1, 1, 0, 0, np.NaN, 0, 0], "x": [1, 2, 1, 3, np.NaN, 4, 3, 5, 1, 6], @@ -1527,7 +1527,7 @@ def test_pickle_path_localpath(self): def test_value_labels_iterator(self, write_index): # GH 16923 d = {"A": ["B", "E", "C", "A", "E"]} - df = pd.DataFrame(data=d) + df = DataFrame(data=d) df["A"] = df["A"].astype("category") with tm.ensure_clean() as path: df.to_stata(path, write_index=write_index) @@ -1658,7 +1658,7 @@ def test_invalid_date_conversion(self): dt.datetime(2012, 12, 21, 12, 21, 12, 21000), dt.datetime(1776, 7, 4, 7, 4, 7, 4000), ] - original = pd.DataFrame( + original = DataFrame( { "nums": [1.0, 2.0, 3.0], "strs": ["apple", "banana", "cherry"], @@ -1709,14 +1709,14 @@ def test_unicode_dta_118(self): ["", "", "s", "", "s"], ["", "", " ", "", " "], ] - expected = pd.DataFrame(values, columns=columns) + expected = DataFrame(values, columns=columns) tm.assert_frame_equal(unicode_df, expected) def test_mixed_string_strl(self): # GH 23633 output = [{"mixed": "string" * 500, "number": 0}, {"mixed": None, "number": 1}] - output = pd.DataFrame(output) + output = DataFrame(output) output.number = output.number.astype("int32") with tm.ensure_clean() as path: @@ -1737,7 +1737,7 @@ def test_mixed_string_strl(self): @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) def test_all_none_exception(self, version): output = [{"none": "none", "number": 0}, {"none": None, "number": 1}] - output = pd.DataFrame(output) + output = DataFrame(output) output.loc[:, "none"] = None with tm.ensure_clean() as path: with pytest.raises(ValueError, match="Column `none` cannot be exported"): @@ -1791,7 +1791,7 @@ def test_encoding_latin1_118(self): assert len(w) == 151 assert w[0].message.args[0] == msg - expected = pd.DataFrame([["Düsseldorf"]] * 151, columns=["kreis1849"]) + expected = DataFrame([["Düsseldorf"]] * 151, columns=["kreis1849"]) tm.assert_frame_equal(encoded, expected) @pytest.mark.slow @@ -1808,7 +1808,7 @@ def test_stata_119(self): @pytest.mark.parametrize("version", [118, 119, None]) def test_utf8_writer(self, version): cat = pd.Categorical(["a", "β", "ĉ"], ordered=True) - data = pd.DataFrame( + data = DataFrame( [ [1.0, 1, "ᴬ", "ᴀ relatively long ŝtring"], [2.0, 2, "ᴮ", ""], diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index e666a8e412a52..ba59fc1a3cc3f 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -617,7 +617,7 @@ def test_subplots_timeseries_y_axis_not_supported(self): pd.to_datetime("2017-08-02 00:00:00"), ], } - testdata = pd.DataFrame(data) + testdata = DataFrame(data) ax_period = testdata.plot(x="numeric", y="period") assert ( ax_period.get_lines()[0].get_data()[1] == testdata["period"].values @@ -987,7 +987,7 @@ def test_bar_colors(self): tm.close() def test_bar_user_colors(self): - df = pd.DataFrame( + df = DataFrame( {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} ) # This should *only* work when `y` is specified, else @@ -1149,13 +1149,13 @@ def test_bar_nan(self): @pytest.mark.slow def test_bar_categorical(self): # GH 13019 - df1 = pd.DataFrame( + df1 = DataFrame( np.random.randn(6, 5), index=pd.Index(list("ABCDEF")), columns=pd.Index(list("abcde")), ) # categorical index must behave the same - df2 = pd.DataFrame( + df2 = DataFrame( np.random.randn(6, 5), index=pd.CategoricalIndex(list("ABCDEF")), columns=pd.CategoricalIndex(list("abcde")), @@ -1198,7 +1198,7 @@ def test_plot_scatter(self): def test_raise_error_on_datetime_time_data(self): # GH 8113, datetime.time type is not supported by matplotlib in scatter - df = pd.DataFrame(np.random.randn(10), columns=["a"]) + df = DataFrame(np.random.randn(10), columns=["a"]) df["dtime"] = pd.date_range(start="2014-01-01", freq="h", periods=10).time msg = "must be a string or a number, not 'datetime.time'" @@ -1209,19 +1209,19 @@ def test_scatterplot_datetime_data(self): # GH 30391 dates = pd.date_range(start=date(2019, 1, 1), periods=12, freq="W") vals = np.random.normal(0, 1, len(dates)) - df = pd.DataFrame({"dates": dates, "vals": vals}) + df = DataFrame({"dates": dates, "vals": vals}) _check_plot_works(df.plot.scatter, x="dates", y="vals") _check_plot_works(df.plot.scatter, x=0, y=1) def test_scatterplot_object_data(self): # GH 18755 - df = pd.DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) + df = DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - df = pd.DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) + df = DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) @@ -1232,7 +1232,7 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) ax1 = df.plot.scatter(x="A label", y="B label") ax2 = df.plot.scatter(x="A label", y="B label", c="C label") @@ -1255,7 +1255,7 @@ def test_if_hexbin_xaxis_label_is_visible(self): # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) ax = df.plot.hexbin("A label", "B label", gridsize=12) assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) @@ -1267,7 +1267,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): import matplotlib.pyplot as plt random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) fig, axes = plt.subplots(1, 2) df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) @@ -1284,9 +1284,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): @pytest.mark.slow def test_plot_scatter_with_categorical_data(self, x, y): # after fixing GH 18755, should be able to plot categorical data - df = pd.DataFrame( - {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])} - ) + df = DataFrame({"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])}) _check_plot_works(df.plot.scatter, x=x, y=y) @@ -1345,7 +1343,7 @@ def test_plot_scatter_with_c(self): @pytest.mark.parametrize("cmap", [None, "Greys"]) def test_scatter_with_c_column_name_with_colors(self, cmap): # https://github.com/pandas-dev/pandas/issues/34316 - df = pd.DataFrame( + df = DataFrame( [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], columns=["length", "width"], ) @@ -1383,7 +1381,7 @@ def test_scatter_colorbar_different_cmap(self): # GH 33389 import matplotlib.pyplot as plt - df = pd.DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) + df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) df["x2"] = df["x"] + 1 fig, ax = plt.subplots() @@ -1750,7 +1748,7 @@ def test_hist_df(self): def test_hist_weights(self, weights): # GH 33173 np.random.seed(0) - df = pd.DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) + df = DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) ax1 = _check_plot_works(df.plot, kind="hist", weights=weights) ax2 = _check_plot_works(df.plot, kind="hist") @@ -1991,9 +1989,7 @@ def test_df_legend_labels(self): def test_missing_marker_multi_plots_on_same_ax(self): # GH 18222 - df = pd.DataFrame( - data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"] - ) + df = DataFrame(data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"]) fig, ax = self.plt.subplots(nrows=1, ncols=3) # Left plot df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[0]) @@ -2125,7 +2121,7 @@ def test_line_colors(self): @pytest.mark.slow def test_dont_modify_colors(self): colors = ["r", "g", "b"] - pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) + DataFrame(np.random.rand(10, 2)).plot(color=colors) assert len(colors) == 3 @pytest.mark.slow @@ -3253,7 +3249,7 @@ def test_passed_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] colormap = mpl.colors.ListedColormap(color_tuples) - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_rcParams_bar_colors(self): @@ -3261,14 +3257,14 @@ def test_rcParams_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] @pytest.mark.parametrize("method", ["line", "barh", "bar"]) def test_secondary_axis_font_size(self, method): # GH: 12565 df = ( - pd.DataFrame(np.random.randn(15, 2), columns=list("AB")) + DataFrame(np.random.randn(15, 2), columns=list("AB")) .assign(C=lambda df: df.B.cumsum()) .assign(D=lambda df: df.C * 1.1) ) @@ -3284,7 +3280,7 @@ def test_secondary_axis_font_size(self, method): def test_x_string_values_ticks(self): # Test if string plot index have a fixed xtick position # GH: 7612, GH: 22334 - df = pd.DataFrame( + df = DataFrame( { "sales": [3, 2, 3], "visits": [20, 42, 28], @@ -3305,7 +3301,7 @@ def test_x_multiindex_values_ticks(self): # Test if multiindex plot index have a fixed xtick position # GH: 15912 index = pd.MultiIndex.from_product([[2012, 2013], [1, 2]]) - df = pd.DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) + df = DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) ax = df.plot() ax.set_xlim(-1, 4) xticklabels = [t.get_text() for t in ax.get_xticklabels()] @@ -3320,7 +3316,7 @@ def test_x_multiindex_values_ticks(self): def test_xlim_plot_line(self, kind): # test if xlim is set correctly in plot.line and plot.area # GH 27686 - df = pd.DataFrame([2, 4], index=[1, 2]) + df = DataFrame([2, 4], index=[1, 2]) ax = df.plot(kind=kind) xlims = ax.get_xlim() assert xlims[0] < 1 @@ -3332,7 +3328,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): fig, ax = self.plt.subplots() indexes = ["k1", "k2", "k3", "k4"] - df = pd.DataFrame( + df = DataFrame( { "s1": [1000, 2000, 1500, 2000], "s2": [900, 1400, 2000, 3000], @@ -3355,7 +3351,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): def test_subplots_sharex_false(self): # test when sharex is set to False, two plots should have different # labels, GH 25160 - df = pd.DataFrame(np.random.rand(10, 2)) + df = DataFrame(np.random.rand(10, 2)) df.iloc[5:, 1] = np.nan df.iloc[:5, 0] = np.nan @@ -3370,7 +3366,7 @@ def test_subplots_sharex_false(self): def test_plot_no_rows(self): # GH 27758 - df = pd.DataFrame(columns=["foo"], dtype=int) + df = DataFrame(columns=["foo"], dtype=int) assert df.empty ax = df.plot() assert len(ax.get_lines()) == 1 @@ -3379,13 +3375,13 @@ def test_plot_no_rows(self): assert len(line.get_ydata()) == 0 def test_plot_no_numeric_data(self): - df = pd.DataFrame(["a", "b", "c"]) + df = DataFrame(["a", "b", "c"]) with pytest.raises(TypeError): df.plot() def test_missing_markers_legend(self): # 14958 - df = pd.DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) + df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) ax = df.plot(y=["A"], marker="x", linestyle="solid") df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax) df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax) @@ -3395,7 +3391,7 @@ def test_missing_markers_legend(self): def test_missing_markers_legend_using_style(self): # 14563 - df = pd.DataFrame( + df = DataFrame( { "A": [1, 2, 3, 4, 5, 6], "B": [2, 4, 1, 3, 2, 4], @@ -3414,8 +3410,8 @@ def test_missing_markers_legend_using_style(self): def test_colors_of_columns_with_same_name(self): # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 # Creating a DataFrame with duplicate column labels and testing colors of them. - df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) - df1 = pd.DataFrame({"a": [2, 4, 6]}) + df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = DataFrame({"a": [2, 4, 6]}) df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): @@ -3436,7 +3432,7 @@ def test_xlabel_ylabel_dataframe_single_plot( self, kind, index_name, old_label, new_label ): # GH 9093 - df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) df.index.name = index_name # default is the ylabel is not shown and xlabel is index name @@ -3463,7 +3459,7 @@ def test_xlabel_ylabel_dataframe_plane_plot(self, kind, xlabel, ylabel): # GH 37001 xcol = "Type A" ycol = "Type B" - df = pd.DataFrame([[1, 2], [2, 5]], columns=[xcol, ycol]) + df = DataFrame([[1, 2], [2, 5]], columns=[xcol, ycol]) # default is the labels are column names ax = df.plot(kind=kind, x=xcol, y=ycol, xlabel=xlabel, ylabel=ylabel) @@ -3485,7 +3481,7 @@ def test_xlabel_ylabel_dataframe_subplots( self, kind, index_name, old_label, new_label ): # GH 9093 - df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) df.index.name = index_name # default is the ylabel is not shown and xlabel is index name diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 02231f0431d9f..cc86436ee8fa9 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -918,7 +918,7 @@ def test_all_any_boolean(self): def test_any_axis1_bool_only(self): # GH#32432 - df = pd.DataFrame({"A": [True, False], "B": [1, 2]}) + df = DataFrame({"A": [True, False], "B": [1, 2]}) result = df.any(axis=1, bool_only=True) expected = Series([True, False]) tm.assert_series_equal(result, expected) @@ -1031,9 +1031,9 @@ def test_minmax_nat_series(self, nat_ser): @pytest.mark.parametrize( "nat_df", [ - pd.DataFrame([pd.NaT, pd.NaT]), - pd.DataFrame([pd.NaT, pd.Timedelta("nat")]), - pd.DataFrame([pd.Timedelta("nat"), pd.Timedelta("nat")]), + DataFrame([pd.NaT, pd.NaT]), + DataFrame([pd.NaT, pd.Timedelta("nat")]), + DataFrame([pd.Timedelta("nat"), pd.Timedelta("nat")]), ], ) def test_minmax_nat_dataframe(self, nat_df): diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 1b9145679fb12..7389fa31109f8 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -3,7 +3,6 @@ import numpy as np import pytest -import pandas as pd from pandas import DataFrame, Series import pandas._testing as tm from pandas.core.groupby.groupby import DataError @@ -158,7 +157,7 @@ def test_resample_count_empty_dataframe(freq, empty_frame_dti): index = _asfreq_compat(empty_frame_dti.index, freq) - expected = pd.DataFrame({"a": []}, dtype="int64", index=index) + expected = DataFrame({"a": []}, dtype="int64", index=index) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 07e47650d0c24..19e5a5dd7f5e7 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1442,14 +1442,14 @@ def test_groupby_with_dst_time_change(): [1478064900001000000, 1480037118776792000], tz="UTC" ).tz_convert("America/Chicago") - df = pd.DataFrame([1, 2], index=index) + df = DataFrame([1, 2], index=index) result = df.groupby(pd.Grouper(freq="1d")).last() expected_index_values = pd.date_range( "2016-11-02", "2016-11-24", freq="d", tz="America/Chicago" ) index = pd.DatetimeIndex(expected_index_values) - expected = pd.DataFrame([1.0] + ([np.nan] * 21) + [2.0], index=index) + expected = DataFrame([1.0] + ([np.nan] * 21) + [2.0], index=index) tm.assert_frame_equal(result, expected) @@ -1586,7 +1586,7 @@ def test_downsample_dst_at_midnight(): index = pd.date_range(start, end, freq="1H") index = index.tz_localize("UTC").tz_convert("America/Havana") data = list(range(len(index))) - dataframe = pd.DataFrame(data, index=index) + dataframe = DataFrame(data, index=index) result = dataframe.groupby(pd.Grouper(freq="1D")).mean() dti = date_range("2018-11-03", periods=3).tz_localize( @@ -1663,7 +1663,7 @@ def f(data, add_arg): tm.assert_series_equal(result, expected) # Testing dataframe - df = pd.DataFrame({"A": 1, "B": 2}, index=pd.date_range("2017", periods=10)) + df = DataFrame({"A": 1, "B": 2}, index=pd.date_range("2017", periods=10)) result = df.groupby("A").resample("D").agg(f, multiplier) expected = df.groupby("A").resample("D").mean().multiply(multiplier) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_deprecated.py b/pandas/tests/resample/test_deprecated.py index 24695a38a85ac..6523c53cfd2a1 100644 --- a/pandas/tests/resample/test_deprecated.py +++ b/pandas/tests/resample/test_deprecated.py @@ -41,7 +41,7 @@ def test_deprecating_on_loffset_and_base(): # GH 31809 idx = pd.date_range("2001-01-01", periods=4, freq="T") - df = pd.DataFrame(data=4 * [range(2)], index=idx, columns=["a", "b"]) + df = DataFrame(data=4 * [range(2)], index=idx, columns=["a", "b"]) with tm.assert_produces_warning(FutureWarning): pd.Grouper(freq="10s", base=0) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index f5b655ebd416b..8bdaad285e3f6 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -566,7 +566,7 @@ def test_resample_with_dst_time_change(self): .tz_localize("UTC") .tz_convert("America/Chicago") ) - df = pd.DataFrame([1, 2], index=index) + df = DataFrame([1, 2], index=index) result = df.resample("12h", closed="right", label="right").last().ffill() expected_index_values = [ @@ -588,7 +588,7 @@ def test_resample_with_dst_time_change(self): "America/Chicago" ) index = pd.DatetimeIndex(index, freq="12h") - expected = pd.DataFrame( + expected = DataFrame( [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0], index=index, ) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index dbb85c2f890bf..29f2aea1648ec 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -571,7 +571,7 @@ def test_agg_with_datetime_index_list_agg_func(col_name): # date parser. Some would result in OutOfBoundsError (ValueError) while # others would result in OverflowError when passed into Timestamp. # We catch these errors and move on to the correct branch. - df = pd.DataFrame( + df = DataFrame( list(range(200)), index=pd.date_range( start="2017-01-01", freq="15min", periods=200, tz="Europe/Berlin" @@ -579,7 +579,7 @@ def test_agg_with_datetime_index_list_agg_func(col_name): columns=[col_name], ) result = df.resample("1d").aggregate(["mean"]) - expected = pd.DataFrame( + expected = DataFrame( [47.5, 143.5, 195.5], index=pd.date_range( start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin" diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 53966392d3aff..ca31ef684257d 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -126,7 +126,7 @@ def test_getitem_multiple(): def test_groupby_resample_on_api_with_getitem(): # GH 17813 - df = pd.DataFrame( + df = DataFrame( {"id": list("aabbb"), "date": pd.date_range("1-1-2016", periods=5), "data": 1} ) exp = df.set_index("date").groupby("id").resample("2D")["data"].sum() @@ -351,7 +351,7 @@ def test_median_duplicate_columns(): def test_apply_to_one_column_of_df(): # GH: 36951 - df = pd.DataFrame( + df = DataFrame( {"col": range(10), "col1": range(10, 20)}, index=pd.date_range("2012-01-01", periods=10, freq="20min"), ) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index c8c5fa47706fc..0832724110203 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -293,7 +293,7 @@ def test_groupby_resample_interpolate(): # GH 35325 d = {"price": [10, 11, 9], "volume": [50, 60, 50]} - df = pd.DataFrame(d) + df = DataFrame(d) df["week_starting"] = pd.date_range("01/01/2018", periods=3, freq="W") @@ -324,7 +324,7 @@ def test_groupby_resample_interpolate(): ], names=["volume", "week_starting"], ) - expected = pd.DataFrame( + expected = DataFrame( data={ "price": [ 10.0, diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index d0a0cf3cacd16..4783d806f8023 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -154,13 +154,13 @@ def test_resample_timedelta_edge_case(start, end, freq, resample_freq): def test_resample_with_timedelta_yields_no_empty_groups(): # GH 10603 - df = pd.DataFrame( + df = DataFrame( np.random.normal(size=(10000, 4)), index=pd.timedelta_range(start="0s", periods=10000, freq="3906250n"), ) result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x)) - expected = pd.DataFrame( + expected = DataFrame( [[768.0] * 4] * 12 + [[528.0] * 4], index=pd.timedelta_range(start="1s", periods=13, freq="3s"), ) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 4cc72e66353b3..8108cd14b872a 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -743,7 +743,7 @@ def test_join_multi_to_multi(self, join_type): def test_join_on_tz_aware_datetimeindex(self): # GH 23931, 26335 - df1 = pd.DataFrame( + df1 = DataFrame( { "date": pd.date_range( start="2018-01-01", periods=5, tz="America/Chicago" @@ -752,7 +752,7 @@ def test_join_on_tz_aware_datetimeindex(self): } ) - df2 = pd.DataFrame( + df2 = DataFrame( { "date": pd.date_range( start="2018-01-03", periods=5, tz="America/Chicago" diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 6968dc781b6e3..7d701d26185f1 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -122,10 +122,10 @@ def setup_method(self, method): def test_merge_inner_join_empty(self): # GH 15328 - df_empty = pd.DataFrame() - df_a = pd.DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") + df_empty = DataFrame() + df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") result = pd.merge(df_empty, df_a, left_index=True, right_index=True) - expected = pd.DataFrame({"a": []}, index=[], dtype="int64") + expected = DataFrame({"a": []}, index=[], dtype="int64") tm.assert_frame_equal(result, expected) def test_merge_common(self): @@ -136,7 +136,7 @@ def test_merge_common(self): def test_merge_non_string_columns(self): # https://github.com/pandas-dev/pandas/issues/17962 # Checks that method runs for non string column names - left = pd.DataFrame( + left = DataFrame( {0: [1, 0, 1, 0], 1: [0, 1, 0, 0], 2: [0, 0, 2, 0], 3: [1, 0, 0, 3]} ) @@ -430,10 +430,10 @@ def test_left_merge_empty_dataframe(self): ) def test_merge_left_empty_right_empty(self, join_type, kwarg): # GH 10824 - left = pd.DataFrame(columns=["a", "b", "c"]) - right = pd.DataFrame(columns=["x", "y", "z"]) + left = DataFrame(columns=["a", "b", "c"]) + right = DataFrame(columns=["x", "y", "z"]) - exp_in = pd.DataFrame( + exp_in = DataFrame( columns=["a", "b", "c", "x", "y", "z"], index=pd.Index([], dtype=object), dtype=object, @@ -444,10 +444,10 @@ def test_merge_left_empty_right_empty(self, join_type, kwarg): def test_merge_left_empty_right_notempty(self): # GH 10824 - left = pd.DataFrame(columns=["a", "b", "c"]) - right = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["x", "y", "z"]) + left = DataFrame(columns=["a", "b", "c"]) + right = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["x", "y", "z"]) - exp_out = pd.DataFrame( + exp_out = DataFrame( { "a": np.array([np.nan] * 3, dtype=object), "b": np.array([np.nan] * 3, dtype=object), @@ -493,10 +493,10 @@ def check2(exp, kwarg): def test_merge_left_notempty_right_empty(self): # GH 10824 - left = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) - right = pd.DataFrame(columns=["x", "y", "z"]) + left = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) + right = DataFrame(columns=["x", "y", "z"]) - exp_out = pd.DataFrame( + exp_out = DataFrame( { "a": [1, 4, 7], "b": [2, 5, 8], @@ -534,12 +534,12 @@ def check2(exp, kwarg): def test_merge_empty_frame(self, series_of_dtype, series_of_dtype2): # GH 25183 - df = pd.DataFrame( + df = DataFrame( {"key": series_of_dtype, "value": series_of_dtype2}, columns=["key", "value"], ) df_empty = df[:0] - expected = pd.DataFrame( + expected = DataFrame( { "value_x": Series(dtype=df.dtypes["value"]), "key": Series(dtype=df.dtypes["key"]), @@ -552,15 +552,15 @@ def test_merge_empty_frame(self, series_of_dtype, series_of_dtype2): def test_merge_all_na_column(self, series_of_dtype, series_of_dtype_all_na): # GH 25183 - df_left = pd.DataFrame( + df_left = DataFrame( {"key": series_of_dtype, "value": series_of_dtype_all_na}, columns=["key", "value"], ) - df_right = pd.DataFrame( + df_right = DataFrame( {"key": series_of_dtype, "value": series_of_dtype_all_na}, columns=["key", "value"], ) - expected = pd.DataFrame( + expected = DataFrame( { "key": series_of_dtype, "value_x": series_of_dtype_all_na, @@ -675,7 +675,7 @@ def test_join_append_timedeltas(self): def test_other_datetime_unit(self): # GH 13389 - df1 = pd.DataFrame({"entity_id": [101, 102]}) + df1 = DataFrame({"entity_id": [101, 102]}) s = Series([None, None], index=[101, 102], name="days") for dtype in [ @@ -694,7 +694,7 @@ def test_other_datetime_unit(self): result = df1.merge(df2, left_on="entity_id", right_index=True) - exp = pd.DataFrame( + exp = DataFrame( { "entity_id": [101, 102], "days": np.array(["nat", "nat"], dtype="datetime64[ns]"), @@ -706,7 +706,7 @@ def test_other_datetime_unit(self): @pytest.mark.parametrize("unit", ["D", "h", "m", "s", "ms", "us", "ns"]) def test_other_timedelta_unit(self, unit): # GH 13389 - df1 = pd.DataFrame({"entity_id": [101, 102]}) + df1 = DataFrame({"entity_id": [101, 102]}) s = Series([None, None], index=[101, 102], name="days") dtype = f"m8[{unit}]" @@ -715,7 +715,7 @@ def test_other_timedelta_unit(self, unit): result = df1.merge(df2, left_on="entity_id", right_index=True) - exp = pd.DataFrame( + exp = DataFrame( {"entity_id": [101, 102], "days": np.array(["nat", "nat"], dtype=dtype)}, columns=["entity_id", "days"], ) @@ -748,13 +748,13 @@ def test_overlapping_columns_error_message(self): def test_merge_on_datetime64tz(self): # GH11405 - left = pd.DataFrame( + left = DataFrame( { "key": pd.date_range("20151010", periods=2, tz="US/Eastern"), "value": [1, 2], } ) - right = pd.DataFrame( + right = DataFrame( { "key": pd.date_range("20151011", periods=3, tz="US/Eastern"), "value": [1, 2, 3], @@ -771,13 +771,13 @@ def test_merge_on_datetime64tz(self): result = pd.merge(left, right, on="key", how="outer") tm.assert_frame_equal(result, expected) - left = pd.DataFrame( + left = DataFrame( { "key": [1, 2], "value": pd.date_range("20151010", periods=2, tz="US/Eastern"), } ) - right = pd.DataFrame( + right = DataFrame( { "key": [2, 3], "value": pd.date_range("20151011", periods=2, tz="US/Eastern"), @@ -800,7 +800,7 @@ def test_merge_on_datetime64tz(self): def test_merge_on_datetime64tz_empty(self): # https://github.com/pandas-dev/pandas/issues/25014 dtz = pd.DatetimeTZDtype(tz="UTC") - right = pd.DataFrame( + right = DataFrame( { "date": [pd.Timestamp("2018", tz=dtz.tz)], "value": [4.0], @@ -810,7 +810,7 @@ def test_merge_on_datetime64tz_empty(self): ) left = right[:0] result = left.merge(right, on="date") - expected = pd.DataFrame( + expected = DataFrame( { "value_x": Series(dtype=float), "date2_x": Series(dtype=dtz), @@ -824,12 +824,12 @@ def test_merge_on_datetime64tz_empty(self): def test_merge_datetime64tz_with_dst_transition(self): # GH 18885 - df1 = pd.DataFrame( + df1 = DataFrame( pd.date_range("2017-10-29 01:00", periods=4, freq="H", tz="Europe/Madrid"), columns=["date"], ) df1["value"] = 1 - df2 = pd.DataFrame( + df2 = DataFrame( { "date": pd.to_datetime( [ @@ -843,7 +843,7 @@ def test_merge_datetime64tz_with_dst_transition(self): ) df2["date"] = df2["date"].dt.tz_localize("UTC").dt.tz_convert("Europe/Madrid") result = pd.merge(df1, df2, how="outer", on="date") - expected = pd.DataFrame( + expected = DataFrame( { "date": pd.date_range( "2017-10-29 01:00", periods=7, freq="H", tz="Europe/Madrid" @@ -868,10 +868,10 @@ def test_merge_non_unique_period_index(self): tm.assert_frame_equal(result, expected) def test_merge_on_periods(self): - left = pd.DataFrame( + left = DataFrame( {"key": pd.period_range("20151010", periods=2, freq="D"), "value": [1, 2]} ) - right = pd.DataFrame( + right = DataFrame( { "key": pd.period_range("20151011", periods=3, freq="D"), "value": [1, 2, 3], @@ -888,10 +888,10 @@ def test_merge_on_periods(self): result = pd.merge(left, right, on="key", how="outer") tm.assert_frame_equal(result, expected) - left = pd.DataFrame( + left = DataFrame( {"key": [1, 2], "value": pd.period_range("20151010", periods=2, freq="D")} ) - right = pd.DataFrame( + right = DataFrame( {"key": [2, 3], "value": pd.period_range("20151011", periods=2, freq="D")} ) @@ -1132,7 +1132,7 @@ def test_validation(self): tm.assert_frame_equal(result, expected_3) # Dups on right - right_w_dups = right.append(pd.DataFrame({"a": ["e"], "c": ["moo"]}, index=[4])) + right_w_dups = right.append(DataFrame({"a": ["e"], "c": ["moo"]}, index=[4])) merge( left, right_w_dups, @@ -1156,7 +1156,7 @@ def test_validation(self): # Dups on left left_w_dups = left.append( - pd.DataFrame({"a": ["a"], "c": ["cow"]}, index=[3]), sort=True + DataFrame({"a": ["a"], "c": ["cow"]}, index=[3]), sort=True ) merge( left_w_dups, @@ -1242,7 +1242,7 @@ def test_validation(self): def test_merge_two_empty_df_no_division_error(self): # GH17776, PR #17846 - a = pd.DataFrame({"a": [], "b": [], "c": []}) + a = DataFrame({"a": [], "b": [], "c": []}) with np.errstate(divide="raise"): merge(a, a, on=("a", "b")) @@ -1285,10 +1285,10 @@ def test_merge_on_index_with_more_values(self, how, index, expected_index): # GH 24212 # pd.merge gets [0, 1, 2, -1, -1, -1] as left_indexer, ensure that # -1 is interpreted as a missing value instead of the last element - df1 = pd.DataFrame({"a": [0, 1, 2], "key": [0, 1, 2]}, index=index) - df2 = pd.DataFrame({"b": [0, 1, 2, 3, 4, 5]}) + df1 = DataFrame({"a": [0, 1, 2], "key": [0, 1, 2]}, index=index) + df2 = DataFrame({"b": [0, 1, 2, 3, 4, 5]}) result = df1.merge(df2, left_on="key", right_index=True, how=how) - expected = pd.DataFrame( + expected = DataFrame( [ [0, 0, 0], [1, 1, 1], @@ -1306,10 +1306,10 @@ def test_merge_right_index_right(self): # Note: the expected output here is probably incorrect. # See https://github.com/pandas-dev/pandas/issues/17257 for more. # We include this as a regression test for GH-24897. - left = pd.DataFrame({"a": [1, 2, 3], "key": [0, 1, 1]}) - right = pd.DataFrame({"b": [1, 2, 3]}) + left = DataFrame({"a": [1, 2, 3], "key": [0, 1, 1]}) + right = DataFrame({"b": [1, 2, 3]}) - expected = pd.DataFrame( + expected = DataFrame( {"a": [1, 2, 3, None], "key": [0, 1, 1, 2], "b": [1, 2, 2, 3]}, columns=["a", "key", "b"], index=[0, 1, 2, np.nan], @@ -1320,30 +1320,26 @@ def test_merge_right_index_right(self): @pytest.mark.parametrize("how", ["left", "right"]) def test_merge_preserves_row_order(self, how): # GH 27453 - left_df = pd.DataFrame({"animal": ["dog", "pig"], "max_speed": [40, 11]}) - right_df = pd.DataFrame({"animal": ["quetzal", "pig"], "max_speed": [80, 11]}) + left_df = DataFrame({"animal": ["dog", "pig"], "max_speed": [40, 11]}) + right_df = DataFrame({"animal": ["quetzal", "pig"], "max_speed": [80, 11]}) result = left_df.merge(right_df, on=["animal", "max_speed"], how=how) if how == "right": - expected = pd.DataFrame( - {"animal": ["quetzal", "pig"], "max_speed": [80, 11]} - ) + expected = DataFrame({"animal": ["quetzal", "pig"], "max_speed": [80, 11]}) else: - expected = pd.DataFrame({"animal": ["dog", "pig"], "max_speed": [40, 11]}) + expected = DataFrame({"animal": ["dog", "pig"], "max_speed": [40, 11]}) tm.assert_frame_equal(result, expected) def test_merge_take_missing_values_from_index_of_other_dtype(self): # GH 24212 - left = pd.DataFrame( + left = DataFrame( { "a": [1, 2, 3], "key": pd.Categorical(["a", "a", "b"], categories=list("abc")), } ) - right = pd.DataFrame( - {"b": [1, 2, 3]}, index=pd.CategoricalIndex(["a", "b", "c"]) - ) + right = DataFrame({"b": [1, 2, 3]}, index=pd.CategoricalIndex(["a", "b", "c"])) result = left.merge(right, left_on="key", right_index=True, how="right") - expected = pd.DataFrame( + expected = DataFrame( { "a": [1, 2, 3, None], "key": pd.Categorical(["a", "a", "b", "c"]), @@ -1356,10 +1352,10 @@ def test_merge_take_missing_values_from_index_of_other_dtype(self): def test_merge_readonly(self): # https://github.com/pandas-dev/pandas/issues/27943 - data1 = pd.DataFrame( + data1 = DataFrame( np.arange(20).reshape((4, 5)) + 1, columns=["a", "b", "c", "d", "e"] ) - data2 = pd.DataFrame( + data2 = DataFrame( np.arange(20).reshape((5, 4)) + 1, columns=["a", "b", "x", "y"] ) @@ -1743,7 +1739,7 @@ def test_self_join_multiple_categories(self): # GH 16767 # non-duplicates should work with multiple categories m = 5 - df = pd.DataFrame( + df = DataFrame( { "a": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] * m, "b": ["t", "w", "x", "y", "z"] * 2 * m, @@ -1783,17 +1779,17 @@ def test_dtype_on_categorical_dates(self): # GH 16900 # dates should not be coerced to ints - df = pd.DataFrame( + df = DataFrame( [[date(2001, 1, 1), 1.1], [date(2001, 1, 2), 1.3]], columns=["date", "num2"] ) df["date"] = df["date"].astype("category") - df2 = pd.DataFrame( + df2 = DataFrame( [[date(2001, 1, 1), 1.3], [date(2001, 1, 3), 1.4]], columns=["date", "num4"] ) df2["date"] = df2["date"].astype("category") - expected_outer = pd.DataFrame( + expected_outer = DataFrame( [ [pd.Timestamp("2001-01-01"), 1.1, 1.3], [pd.Timestamp("2001-01-02"), 1.3, np.nan], @@ -1804,7 +1800,7 @@ def test_dtype_on_categorical_dates(self): result_outer = pd.merge(df, df2, how="outer", on=["date"]) tm.assert_frame_equal(result_outer, expected_outer) - expected_inner = pd.DataFrame( + expected_inner = DataFrame( [[pd.Timestamp("2001-01-01"), 1.1, 1.3]], columns=["date", "num2", "num4"] ) result_inner = pd.merge(df, df2, how="inner", on=["date"]) @@ -1824,21 +1820,19 @@ def test_merging_with_bool_or_int_cateorical_column( ): # GH 17187 # merging with a boolean/int categorical column - df1 = pd.DataFrame({"id": [1, 2, 3, 4], "cat": category_column}) + df1 = DataFrame({"id": [1, 2, 3, 4], "cat": category_column}) df1["cat"] = df1["cat"].astype(CDT(categories, ordered=ordered)) - df2 = pd.DataFrame({"id": [2, 4], "num": [1, 9]}) + df2 = DataFrame({"id": [2, 4], "num": [1, 9]}) result = df1.merge(df2) - expected = pd.DataFrame( - {"id": [2, 4], "cat": expected_categories, "num": [1, 9]} - ) + expected = DataFrame({"id": [2, 4], "cat": expected_categories, "num": [1, 9]}) expected["cat"] = expected["cat"].astype(CDT(categories, ordered=ordered)) tm.assert_frame_equal(expected, result) def test_merge_on_int_array(self): # GH 23020 - df = pd.DataFrame({"A": Series([1, 2, np.nan], dtype="Int64"), "B": 1}) + df = DataFrame({"A": Series([1, 2, np.nan], dtype="Int64"), "B": 1}) result = pd.merge(df, df, on="A") - expected = pd.DataFrame( + expected = DataFrame( {"A": Series([1, 2, np.nan], dtype="Int64"), "B_x": 1, "B_y": 1} ) tm.assert_frame_equal(result, expected) @@ -1950,7 +1944,7 @@ def test_merge_index_types(index): ) def test_merge_series(on, left_on, right_on, left_index, right_index, nm): # GH 21220 - a = pd.DataFrame( + a = DataFrame( {"A": [1, 2, 3, 4]}, index=pd.MultiIndex.from_product( [["a", "b"], [0, 1]], names=["outer", "inner"] @@ -1963,7 +1957,7 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm): ), name=nm, ) - expected = pd.DataFrame( + expected = DataFrame( {"A": [2, 4], "B": [1, 3]}, index=pd.MultiIndex.from_product([["a", "b"], [1]], names=["outer", "inner"]), ) @@ -2012,10 +2006,10 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm): ) def test_merge_suffix(col1, col2, kwargs, expected_cols): # issue: 24782 - a = pd.DataFrame({col1: [1, 2, 3]}) - b = pd.DataFrame({col2: [4, 5, 6]}) + a = DataFrame({col1: [1, 2, 3]}) + b = DataFrame({col2: [4, 5, 6]}) - expected = pd.DataFrame([[1, 4], [2, 5], [3, 6]], columns=expected_cols) + expected = DataFrame([[1, 4], [2, 5], [3, 6]], columns=expected_cols) result = a.merge(b, left_index=True, right_index=True, **kwargs) tm.assert_frame_equal(result, expected) @@ -2060,8 +2054,8 @@ def test_merge_duplicate_suffix(how, expected): ) def test_merge_suffix_error(col1, col2, suffixes): # issue: 24782 - a = pd.DataFrame({col1: [1, 2, 3]}) - b = pd.DataFrame({col2: [3, 4, 5]}) + a = DataFrame({col1: [1, 2, 3]}) + b = DataFrame({col2: [3, 4, 5]}) # TODO: might reconsider current raise behaviour, see issue 24782 msg = "columns overlap but no suffix specified" @@ -2071,8 +2065,8 @@ def test_merge_suffix_error(col1, col2, suffixes): @pytest.mark.parametrize("suffixes", [{"left", "right"}, {"left": 0, "right": 0}]) def test_merge_suffix_warns(suffixes): - a = pd.DataFrame({"a": [1, 2, 3]}) - b = pd.DataFrame({"b": [3, 4, 5]}) + a = DataFrame({"a": [1, 2, 3]}) + b = DataFrame({"b": [3, 4, 5]}) with tm.assert_produces_warning(FutureWarning): pd.merge(a, b, left_index=True, right_index=True, suffixes={"left", "right"}) @@ -2086,8 +2080,8 @@ def test_merge_suffix_warns(suffixes): ], ) def test_merge_suffix_length_error(col1, col2, suffixes, msg): - a = pd.DataFrame({col1: [1, 2, 3]}) - b = pd.DataFrame({col2: [3, 4, 5]}) + a = DataFrame({col1: [1, 2, 3]}) + b = DataFrame({col2: [3, 4, 5]}) with pytest.raises(ValueError, match=msg): pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes) @@ -2176,9 +2170,9 @@ def test_merge_multiindex_columns(): numbers = ["1", "2", "3"] index = pd.MultiIndex.from_product((letters, numbers), names=["outer", "inner"]) - frame_x = pd.DataFrame(columns=index) + frame_x = DataFrame(columns=index) frame_x["id"] = "" - frame_y = pd.DataFrame(columns=index) + frame_y = DataFrame(columns=index) frame_y["id"] = "" l_suf = "_x" @@ -2190,7 +2184,7 @@ def test_merge_multiindex_columns(): expected_index = pd.MultiIndex.from_product( [expected_labels, numbers], names=["outer", "inner"] ) - expected = pd.DataFrame(columns=expected_index) + expected = DataFrame(columns=expected_index) expected["id"] = "" tm.assert_frame_equal(result, expected) @@ -2198,12 +2192,12 @@ def test_merge_multiindex_columns(): def test_merge_datetime_upcast_dtype(): # https://github.com/pandas-dev/pandas/issues/31208 - df1 = pd.DataFrame({"x": ["a", "b", "c"], "y": ["1", "2", "4"]}) - df2 = pd.DataFrame( + df1 = DataFrame({"x": ["a", "b", "c"], "y": ["1", "2", "4"]}) + df2 = DataFrame( {"y": ["1", "2", "3"], "z": pd.to_datetime(["2000", "2001", "2002"])} ) result = pd.merge(df1, df2, how="left", on="y") - expected = pd.DataFrame( + expected = DataFrame( { "x": ["a", "b", "c"], "y": ["1", "2", "4"], diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index e0063925a03e1..17f2f44f45fce 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -88,9 +88,9 @@ def test_empty_sequence_concat(self): with pytest.raises(ValueError, match=pattern): pd.concat(df_seq) - pd.concat([pd.DataFrame()]) - pd.concat([None, pd.DataFrame()]) - pd.concat([pd.DataFrame(), None]) + pd.concat([DataFrame()]) + pd.concat([None, DataFrame()]) + pd.concat([DataFrame(), None]) def test_doc_example(self): left = DataFrame( diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 61fdafa0c6db2..68096192c51ea 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -200,11 +200,9 @@ def test_merge_multiple_cols_with_mixed_cols_index(self): pd.MultiIndex.from_product([["A", "B"], [1, 2, 3]], names=["lev1", "lev2"]), name="Amount", ) - df = pd.DataFrame( - {"lev1": list("AAABBB"), "lev2": [1, 2, 3, 1, 2, 3], "col": 0} - ) + df = DataFrame({"lev1": list("AAABBB"), "lev2": [1, 2, 3, 1, 2, 3], "col": 0}) result = pd.merge(df, s.reset_index(), on=["lev1", "lev2"]) - expected = pd.DataFrame( + expected = DataFrame( { "lev1": list("AAABBB"), "lev2": [1, 2, 3, 1, 2, 3], @@ -801,7 +799,7 @@ def test_single_common_level(self): [("K0", "X0"), ("K0", "X1"), ("K1", "X2")], names=["key", "X"] ) - left = pd.DataFrame( + left = DataFrame( {"A": ["A0", "A1", "A2"], "B": ["B0", "B1", "B2"]}, index=index_left ) @@ -809,7 +807,7 @@ def test_single_common_level(self): [("K0", "Y0"), ("K1", "Y1"), ("K2", "Y2"), ("K2", "Y3")], names=["key", "Y"] ) - right = pd.DataFrame( + right = DataFrame( {"C": ["C0", "C1", "C2", "C3"], "D": ["D0", "D1", "D2", "D3"]}, index=index_right, ) @@ -828,12 +826,12 @@ def test_join_multi_wrong_order(self): midx1 = pd.MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"]) midx3 = pd.MultiIndex.from_tuples([(4, 1), (3, 2), (3, 1)], names=["b", "a"]) - left = pd.DataFrame(index=midx1, data={"x": [10, 20, 30, 40]}) - right = pd.DataFrame(index=midx3, data={"y": ["foo", "bar", "fing"]}) + left = DataFrame(index=midx1, data={"x": [10, 20, 30, 40]}) + right = DataFrame(index=midx3, data={"y": ["foo", "bar", "fing"]}) result = left.join(right) - expected = pd.DataFrame( + expected = DataFrame( index=midx1, data={"x": [10, 20, 30, 40], "y": ["fing", "foo", "bar", np.nan]}, ) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 340b50ed60ceb..a5b862adc8768 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -344,14 +344,14 @@ def test_concatlike_datetimetz_short(self, tz): # GH#7795 ix1 = pd.date_range(start="2014-07-15", end="2014-07-17", freq="D", tz=tz) ix2 = pd.DatetimeIndex(["2014-07-11", "2014-07-21"], tz=tz) - df1 = pd.DataFrame(0, index=ix1, columns=["A", "B"]) - df2 = pd.DataFrame(0, index=ix2, columns=["A", "B"]) + df1 = DataFrame(0, index=ix1, columns=["A", "B"]) + df2 = DataFrame(0, index=ix2, columns=["A", "B"]) exp_idx = pd.DatetimeIndex( ["2014-07-15", "2014-07-16", "2014-07-17", "2014-07-11", "2014-07-21"], tz=tz, ) - exp = pd.DataFrame(0, index=exp_idx, columns=["A", "B"]) + exp = DataFrame(0, index=exp_idx, columns=["A", "B"]) tm.assert_frame_equal(df1.append(df2), exp) tm.assert_frame_equal(pd.concat([df1, df2]), exp) @@ -849,14 +849,14 @@ def test_append_records(self): # rewrite sort fixture, since we also want to test default of None def test_append_sorts(self, sort): - df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"]) - df2 = pd.DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3]) + df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"]) + df2 = DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3]) with tm.assert_produces_warning(None): result = df1.append(df2, sort=sort) # for None / True - expected = pd.DataFrame( + expected = DataFrame( {"b": [1, 2, None, None], "a": [1, 2, 1, 2], "c": [None, None, 3, 4]}, columns=["a", "b", "c"], ) @@ -937,11 +937,11 @@ def test_append_same_columns_type(self, index): # GH18359 # df wider than ser - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=index) ser_index = index[:2] ser = Series([7, 8], index=ser_index, name=2) result = df.append(ser) - expected = pd.DataFrame( + expected = DataFrame( [[1.0, 2.0, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index ) tm.assert_frame_equal(result, expected) @@ -949,10 +949,10 @@ def test_append_same_columns_type(self, index): # ser wider than df ser_index = index index = index[:2] - df = pd.DataFrame([[1, 2], [4, 5]], columns=index) + df = DataFrame([[1, 2], [4, 5]], columns=index) ser = Series([7, 8, 9], index=ser_index, name=2) result = df.append(ser) - expected = pd.DataFrame( + expected = DataFrame( [[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]], index=[0, 1, 2], columns=ser_index, @@ -969,13 +969,13 @@ def test_append_different_columns_types(self, df_columns, series_index): # See also test 'test_append_different_columns_types_raises' below # for errors raised when appending - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns) ser = Series([7, 8, 9], index=series_index, name=2) result = df.append(ser) idx_diff = ser.index.difference(df_columns) combined_columns = Index(df_columns.tolist()).append(idx_diff) - expected = pd.DataFrame( + expected = DataFrame( [ [1.0, 2.0, 3.0, np.nan, np.nan, np.nan], [4, 5, 6, np.nan, np.nan, np.nan], @@ -1004,7 +1004,7 @@ def test_append_different_columns_types_raises( # See also test 'test_append_different_columns_types' above for # appending without raising. - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_can_append) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_can_append) ser = Series([7, 8, 9], index=index_cannot_append_with_other, name=2) msg = ( r"Expected tuple, got (int|long|float|str|" @@ -1015,9 +1015,7 @@ def test_append_different_columns_types_raises( with pytest.raises(TypeError, match=msg): df.append(ser) - df = pd.DataFrame( - [[1, 2, 3], [4, 5, 6]], columns=index_cannot_append_with_other - ) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_cannot_append_with_other) ser = Series([7, 8, 9], index=index_can_append, name=2) with pytest.raises(TypeError, match=msg): @@ -1112,19 +1110,19 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self): def test_append_empty_tz_frame_with_datetime64ns(self): # https://github.com/pandas-dev/pandas/issues/35460 - df = pd.DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") + df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") # pd.NaT gets inferred as tz-naive, so append result is tz-naive result = df.append({"a": pd.NaT}, ignore_index=True) - expected = pd.DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]") + expected = DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]") tm.assert_frame_equal(result, expected) # also test with typed value to append - df = pd.DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") + df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") result = df.append( Series({"a": pd.NaT}, dtype="datetime64[ns]"), ignore_index=True ) - expected = pd.DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]") + expected = DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]") tm.assert_frame_equal(result, expected) @@ -1316,13 +1314,12 @@ def test_concat_same_index_names(self, name_in1, name_in2, name_in3, name_out): pd.Index(["c", "d", "e"], name=name_in3), ] frames = [ - pd.DataFrame({c: [0, 1, 2]}, index=i) - for i, c in zip(indices, ["x", "y", "z"]) + DataFrame({c: [0, 1, 2]}, index=i) for i, c in zip(indices, ["x", "y", "z"]) ] result = pd.concat(frames, axis=1) exp_ind = pd.Index(["a", "b", "c", "d", "e"], name=name_out) - expected = pd.DataFrame( + expected = DataFrame( { "x": [0, 1, 2, np.nan, np.nan], "y": [np.nan, 0, 1, 2, np.nan], @@ -1383,15 +1380,13 @@ def test_concat_multiindex_with_tz(self): def test_concat_multiindex_with_none_in_index_names(self): # GH 15787 index = pd.MultiIndex.from_product([[1], range(5)], names=["level1", None]) - df = pd.DataFrame({"col": range(5)}, index=index, dtype=np.int32) + df = DataFrame({"col": range(5)}, index=index, dtype=np.int32) result = concat([df, df], keys=[1, 2], names=["level2"]) index = pd.MultiIndex.from_product( [[1, 2], [1], range(5)], names=["level2", "level1", None] ) - expected = pd.DataFrame( - {"col": list(range(5)) * 2}, index=index, dtype=np.int32 - ) + expected = DataFrame({"col": list(range(5)) * 2}, index=index, dtype=np.int32) tm.assert_frame_equal(result, expected) result = concat([df, df[:2]], keys=[1, 2], names=["level2"]) @@ -1400,7 +1395,7 @@ def test_concat_multiindex_with_none_in_index_names(self): no_name = list(range(5)) + list(range(2)) tuples = list(zip(level2, level1, no_name)) index = pd.MultiIndex.from_tuples(tuples, names=["level2", "level1", None]) - expected = pd.DataFrame({"col": no_name}, index=index, dtype=np.int32) + expected = DataFrame({"col": no_name}, index=index, dtype=np.int32) tm.assert_frame_equal(result, expected) def test_concat_keys_and_levels(self): @@ -1876,9 +1871,9 @@ def test_concat_bug_3602(self): def test_concat_inner_join_empty(self): # GH 15328 - df_empty = pd.DataFrame() - df_a = pd.DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") - df_expected = pd.DataFrame({"a": []}, index=[], dtype="int64") + df_empty = DataFrame() + df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") + df_expected = DataFrame({"a": []}, index=[], dtype="int64") for how, expected in [("inner", df_expected), ("outer", df_a)]: result = pd.concat([df_a, df_empty], axis=1, join=how) @@ -2029,40 +2024,40 @@ def test_concat_tz_series(self): # see gh-12217 and gh-12306 # Concatenating two UTC times - first = pd.DataFrame([[datetime(2016, 1, 1)]]) + first = DataFrame([[datetime(2016, 1, 1)]]) first[0] = first[0].dt.tz_localize("UTC") - second = pd.DataFrame([[datetime(2016, 1, 2)]]) + second = DataFrame([[datetime(2016, 1, 2)]]) second[0] = second[0].dt.tz_localize("UTC") result = pd.concat([first, second]) assert result[0].dtype == "datetime64[ns, UTC]" # Concatenating two London times - first = pd.DataFrame([[datetime(2016, 1, 1)]]) + first = DataFrame([[datetime(2016, 1, 1)]]) first[0] = first[0].dt.tz_localize("Europe/London") - second = pd.DataFrame([[datetime(2016, 1, 2)]]) + second = DataFrame([[datetime(2016, 1, 2)]]) second[0] = second[0].dt.tz_localize("Europe/London") result = pd.concat([first, second]) assert result[0].dtype == "datetime64[ns, Europe/London]" # Concatenating 2+1 London times - first = pd.DataFrame([[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]]) + first = DataFrame([[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]]) first[0] = first[0].dt.tz_localize("Europe/London") - second = pd.DataFrame([[datetime(2016, 1, 3)]]) + second = DataFrame([[datetime(2016, 1, 3)]]) second[0] = second[0].dt.tz_localize("Europe/London") result = pd.concat([first, second]) assert result[0].dtype == "datetime64[ns, Europe/London]" # Concat'ing 1+2 London times - first = pd.DataFrame([[datetime(2016, 1, 1)]]) + first = DataFrame([[datetime(2016, 1, 1)]]) first[0] = first[0].dt.tz_localize("Europe/London") - second = pd.DataFrame([[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]]) + second = DataFrame([[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]]) second[0] = second[0].dt.tz_localize("Europe/London") result = pd.concat([first, second]) @@ -2105,13 +2100,11 @@ def test_concat_NaT_dataframes_all_NaT_axis_0(self, tz1, tz2, s): # GH 12396 # tz-naive - first = pd.DataFrame([[pd.NaT], [pd.NaT]]).apply( - lambda x: x.dt.tz_localize(tz1) - ) - second = pd.DataFrame([s]).apply(lambda x: x.dt.tz_localize(tz2)) + first = DataFrame([[pd.NaT], [pd.NaT]]).apply(lambda x: x.dt.tz_localize(tz1)) + second = DataFrame([s]).apply(lambda x: x.dt.tz_localize(tz2)) result = pd.concat([first, second], axis=0) - expected = pd.DataFrame(Series([pd.NaT, pd.NaT, s], index=[0, 1, 0])) + expected = DataFrame(Series([pd.NaT, pd.NaT, s], index=[0, 1, 0])) expected = expected.apply(lambda x: x.dt.tz_localize(tz2)) if tz1 != tz2: expected = expected.astype(object) @@ -2123,9 +2116,9 @@ def test_concat_NaT_dataframes_all_NaT_axis_0(self, tz1, tz2, s): def test_concat_NaT_dataframes_all_NaT_axis_1(self, tz1, tz2): # GH 12396 - first = pd.DataFrame(Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1)) - second = pd.DataFrame(Series([pd.NaT]).dt.tz_localize(tz2), columns=[1]) - expected = pd.DataFrame( + first = DataFrame(Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1)) + second = DataFrame(Series([pd.NaT]).dt.tz_localize(tz2), columns=[1]) + expected = DataFrame( { 0: Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1), 1: Series([pd.NaT, pd.NaT]).dt.tz_localize(tz2), @@ -2141,7 +2134,7 @@ def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2): # tz-naive first = Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1) - second = pd.DataFrame( + second = DataFrame( [ [pd.Timestamp("2015/01/01", tz=tz2)], [pd.Timestamp("2016/01/01", tz=tz2)], @@ -2149,7 +2142,7 @@ def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2): index=[2, 3], ) - expected = pd.DataFrame( + expected = DataFrame( [ pd.NaT, pd.NaT, @@ -2167,13 +2160,13 @@ def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2): def test_concat_NaT_dataframes(self, tz): # GH 12396 - first = pd.DataFrame([[pd.NaT], [pd.NaT]]) + first = DataFrame([[pd.NaT], [pd.NaT]]) first = first.apply(lambda x: x.dt.tz_localize(tz)) - second = pd.DataFrame( + second = DataFrame( [[pd.Timestamp("2015/01/01", tz=tz)], [pd.Timestamp("2016/01/01", tz=tz)]], index=[2, 3], ) - expected = pd.DataFrame( + expected = DataFrame( [ pd.NaT, pd.NaT, @@ -2228,7 +2221,7 @@ def test_concat_empty_series(self): s1 = Series([1, 2, 3], name="x") s2 = Series(name="y", dtype="float64") res = pd.concat([s1, s2], axis=1) - exp = pd.DataFrame( + exp = DataFrame( {"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan]}, index=pd.Index([0, 1, 2], dtype="O"), ) @@ -2245,7 +2238,7 @@ def test_concat_empty_series(self): s1 = Series([1, 2, 3], name="x") s2 = Series(name=None, dtype="float64") res = pd.concat([s1, s2], axis=1) - exp = pd.DataFrame( + exp = DataFrame( {"x": [1, 2, 3], 0: [np.nan, np.nan, np.nan]}, columns=["x", 0], index=pd.Index([0, 1, 2], dtype="O"), @@ -2276,7 +2269,7 @@ def test_default_index(self): s2 = Series([4, 5, 6], name="y") res = pd.concat([s1, s2], axis=1, ignore_index=True) assert isinstance(res.columns, pd.RangeIndex) - exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]]) + exp = DataFrame([[1, 4], [2, 5], [3, 6]]) # use check_index_type=True to check the result have # RangeIndex (default index) tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) @@ -2286,20 +2279,20 @@ def test_default_index(self): s2 = Series([4, 5, 6]) res = pd.concat([s1, s2], axis=1, ignore_index=False) assert isinstance(res.columns, pd.RangeIndex) - exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]]) + exp = DataFrame([[1, 4], [2, 5], [3, 6]]) exp.columns = pd.RangeIndex(2) tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) # is_dataframe and ignore_index - df1 = pd.DataFrame({"A": [1, 2], "B": [5, 6]}) - df2 = pd.DataFrame({"A": [3, 4], "B": [7, 8]}) + df1 = DataFrame({"A": [1, 2], "B": [5, 6]}) + df2 = DataFrame({"A": [3, 4], "B": [7, 8]}) res = pd.concat([df1, df2], axis=0, ignore_index=True) - exp = pd.DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]], columns=["A", "B"]) + exp = DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]], columns=["A", "B"]) tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) res = pd.concat([df1, df2], axis=1, ignore_index=True) - exp = pd.DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]]) + exp = DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]]) tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) def test_concat_multiindex_rangeindex(self): @@ -2322,10 +2315,10 @@ def test_concat_multiindex_dfs_with_deepcopy(self): from copy import deepcopy example_multiindex1 = pd.MultiIndex.from_product([["a"], ["b"]]) - example_dataframe1 = pd.DataFrame([0], index=example_multiindex1) + example_dataframe1 = DataFrame([0], index=example_multiindex1) example_multiindex2 = pd.MultiIndex.from_product([["a"], ["c"]]) - example_dataframe2 = pd.DataFrame([1], index=example_multiindex2) + example_dataframe2 = DataFrame([1], index=example_multiindex2) example_dict = {"s1": example_dataframe1, "s2": example_dataframe2} expected_index = pd.MultiIndex( @@ -2333,7 +2326,7 @@ def test_concat_multiindex_dfs_with_deepcopy(self): codes=[[0, 1], [0, 0], [0, 1]], names=["testname", None, None], ) - expected = pd.DataFrame([[0], [1]], index=expected_index) + expected = DataFrame([[0], [1]], index=expected_index) result_copy = pd.concat(deepcopy(example_dict), names=["testname"]) tm.assert_frame_equal(result_copy, expected) result_no_copy = pd.concat(example_dict, names=["testname"]) @@ -2506,7 +2499,7 @@ def test_concat_categoricalindex(self): result = pd.concat([a, b, c], axis=1) exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories) - exp = pd.DataFrame( + exp = DataFrame( { 0: [1, 1, np.nan, np.nan], 1: [np.nan, 2, 2, np.nan], @@ -2519,10 +2512,8 @@ def test_concat_categoricalindex(self): def test_concat_order(self): # GH 17344 - dfs = [pd.DataFrame(index=range(3), columns=["a", 1, None])] - dfs += [ - pd.DataFrame(index=range(3), columns=[None, 1, "a"]) for i in range(100) - ] + dfs = [DataFrame(index=range(3), columns=["a", 1, None])] + dfs += [DataFrame(index=range(3), columns=[None, 1, "a"]) for i in range(100)] result = pd.concat(dfs, sort=True).columns expected = dfs[0].columns @@ -2532,8 +2523,8 @@ def test_concat_datetime_timezone(self): # GH 18523 idx1 = pd.date_range("2011-01-01", periods=3, freq="H", tz="Europe/Paris") idx2 = pd.date_range(start=idx1[0], end=idx1[-1], freq="H") - df1 = pd.DataFrame({"a": [1, 2, 3]}, index=idx1) - df2 = pd.DataFrame({"b": [1, 2, 3]}, index=idx2) + df1 = DataFrame({"a": [1, 2, 3]}, index=idx1) + df2 = DataFrame({"b": [1, 2, 3]}, index=idx2) result = pd.concat([df1, df2], axis=1) exp_idx = ( @@ -2549,14 +2540,14 @@ def test_concat_datetime_timezone(self): .tz_convert("Europe/Paris") ) - expected = pd.DataFrame( + expected = DataFrame( [[1, 1], [2, 2], [3, 3]], index=exp_idx, columns=["a", "b"] ) tm.assert_frame_equal(result, expected) idx3 = pd.date_range("2011-01-01", periods=3, freq="H", tz="Asia/Tokyo") - df3 = pd.DataFrame({"b": [1, 2, 3]}, index=idx3) + df3 = DataFrame({"b": [1, 2, 3]}, index=idx3) result = pd.concat([df1, df3], axis=1) exp_idx = DatetimeIndex( @@ -2570,7 +2561,7 @@ def test_concat_datetime_timezone(self): ] ) - expected = pd.DataFrame( + expected = DataFrame( [ [np.nan, 1], [np.nan, 2], @@ -2589,7 +2580,7 @@ def test_concat_datetime_timezone(self): result = pd.concat( [df1.resample("H").mean(), df2.resample("H").mean()], sort=True ) - expected = pd.DataFrame( + expected = DataFrame( {"a": [1, 2, 3] + [np.nan] * 3, "b": [np.nan] * 3 + [1, 2, 3]}, index=idx1.append(idx1), ) @@ -2645,9 +2636,9 @@ def test_concat_will_upcast(dt, pdt): def test_concat_empty_and_non_empty_frame_regression(): # GH 18178 regression test - df1 = pd.DataFrame({"foo": [1]}) - df2 = pd.DataFrame({"foo": []}) - expected = pd.DataFrame({"foo": [1.0]}) + df1 = DataFrame({"foo": [1]}) + df2 = DataFrame({"foo": []}) + expected = DataFrame({"foo": [1.0]}) result = pd.concat([df1, df2]) tm.assert_frame_equal(result, expected) @@ -2664,11 +2655,11 @@ def test_concat_empty_and_non_empty_series_regression(): def test_concat_sorts_columns(sort): # GH-4588 - df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"]) - df2 = pd.DataFrame({"a": [3, 4], "c": [5, 6]}) + df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"]) + df2 = DataFrame({"a": [3, 4], "c": [5, 6]}) # for sort=True/None - expected = pd.DataFrame( + expected = DataFrame( {"a": [1, 2, 3, 4], "b": [1, 2, None, None], "c": [None, None, 5, 6]}, columns=["a", "b", "c"], ) @@ -2683,11 +2674,11 @@ def test_concat_sorts_columns(sort): def test_concat_sorts_index(sort): - df1 = pd.DataFrame({"a": [1, 2, 3]}, index=["c", "a", "b"]) - df2 = pd.DataFrame({"b": [1, 2]}, index=["a", "b"]) + df1 = DataFrame({"a": [1, 2, 3]}, index=["c", "a", "b"]) + df2 = DataFrame({"b": [1, 2]}, index=["a", "b"]) # For True/None - expected = pd.DataFrame( + expected = DataFrame( {"a": [2, 3, 1], "b": [1, 2, None]}, index=["a", "b", "c"], columns=["a", "b"] ) if sort is False: @@ -2701,15 +2692,15 @@ def test_concat_sorts_index(sort): def test_concat_inner_sort(sort): # https://github.com/pandas-dev/pandas/pull/20613 - df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2], "c": [1, 2]}, columns=["b", "a", "c"]) - df2 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=[3, 4]) + df1 = DataFrame({"a": [1, 2], "b": [1, 2], "c": [1, 2]}, columns=["b", "a", "c"]) + df2 = DataFrame({"a": [1, 2], "b": [3, 4]}, index=[3, 4]) with tm.assert_produces_warning(None): # unset sort should *not* warn for inner join # since that never sorted result = pd.concat([df1, df2], sort=sort, join="inner", ignore_index=True) - expected = pd.DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]}, columns=["b", "a"]) + expected = DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]}, columns=["b", "a"]) if sort is True: expected = expected[["a", "b"]] tm.assert_frame_equal(result, expected) @@ -2717,9 +2708,9 @@ def test_concat_inner_sort(sort): def test_concat_aligned_sort(): # GH-4588 - df = pd.DataFrame({"c": [1, 2], "b": [3, 4], "a": [5, 6]}, columns=["c", "b", "a"]) + df = DataFrame({"c": [1, 2], "b": [3, 4], "a": [5, 6]}, columns=["c", "b", "a"]) result = pd.concat([df, df], sort=True, ignore_index=True) - expected = pd.DataFrame( + expected = DataFrame( {"a": [5, 6, 5, 6], "b": [3, 4, 3, 4], "c": [1, 2, 1, 2]}, columns=["a", "b", "c"], ) @@ -2733,8 +2724,8 @@ def test_concat_aligned_sort(): def test_concat_aligned_sort_does_not_raise(): # GH-4588 # We catch TypeErrors from sorting internally and do not re-raise. - df = pd.DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, "a"]) - expected = pd.DataFrame({1: [1, 2, 1, 2], "a": [3, 4, 3, 4]}, columns=[1, "a"]) + df = DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, "a"]) + expected = DataFrame({1: [1, 2, 1, 2], "a": [3, 4, 3, 4]}, columns=[1, "a"]) result = pd.concat([df, df], ignore_index=True, sort=True) tm.assert_frame_equal(result, expected) @@ -2769,10 +2760,10 @@ def test_concat_categorical_unchanged(): # GH-12007 # test fix for when concat on categorical and float # coerces dtype categorical -> float - df = pd.DataFrame(Series(["a", "b", "c"], dtype="category", name="A")) + df = DataFrame(Series(["a", "b", "c"], dtype="category", name="A")) ser = Series([0, 1, 2], index=[0, 1, 3], name="B") result = pd.concat([df, ser], axis=1) - expected = pd.DataFrame( + expected = DataFrame( { "A": Series(["a", "b", "c", np.nan], dtype="category"), "B": Series([0, 1, np.nan, 2], dtype="float"), @@ -2786,21 +2777,21 @@ def test_concat_datetimeindex_freq(): # Monotonic index result dr = pd.date_range("01-Jan-2013", periods=100, freq="50L", tz="UTC") data = list(range(100)) - expected = pd.DataFrame(data, index=dr) + expected = DataFrame(data, index=dr) result = pd.concat([expected[:50], expected[50:]]) tm.assert_frame_equal(result, expected) # Non-monotonic index result result = pd.concat([expected[50:], expected[:50]]) - expected = pd.DataFrame(data[50:] + data[:50], index=dr[50:].append(dr[:50])) + expected = DataFrame(data[50:] + data[:50], index=dr[50:].append(dr[:50])) expected.index._data.freq = None tm.assert_frame_equal(result, expected) def test_concat_empty_df_object_dtype(): # GH 9149 - df_1 = pd.DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]}) - df_2 = pd.DataFrame(columns=df_1.columns) + df_1 = DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]}) + df_2 = DataFrame(columns=df_1.columns) result = pd.concat([df_1, df_2], axis=0) expected = df_1.astype(object) tm.assert_frame_equal(result, expected) @@ -2809,7 +2800,7 @@ def test_concat_empty_df_object_dtype(): def test_concat_sparse(): # GH 23557 a = Series(SparseArray([0, 1, 2])) - expected = pd.DataFrame(data=[[0, 0], [1, 1], [2, 2]]).astype( + expected = DataFrame(data=[[0, 0], [1, 1], [2, 2]]).astype( pd.SparseDtype(np.int64, 0) ) result = pd.concat([a, a], axis=1) @@ -2906,24 +2897,24 @@ def test_concat_preserves_subclass(obj): def test_concat_frame_axis0_extension_dtypes(): # preserve extension dtype (through common_dtype mechanism) - df1 = pd.DataFrame({"a": pd.array([1, 2, 3], dtype="Int64")}) - df2 = pd.DataFrame({"a": np.array([4, 5, 6])}) + df1 = DataFrame({"a": pd.array([1, 2, 3], dtype="Int64")}) + df2 = DataFrame({"a": np.array([4, 5, 6])}) result = pd.concat([df1, df2], ignore_index=True) - expected = pd.DataFrame({"a": [1, 2, 3, 4, 5, 6]}, dtype="Int64") + expected = DataFrame({"a": [1, 2, 3, 4, 5, 6]}, dtype="Int64") tm.assert_frame_equal(result, expected) result = pd.concat([df2, df1], ignore_index=True) - expected = pd.DataFrame({"a": [4, 5, 6, 1, 2, 3]}, dtype="Int64") + expected = DataFrame({"a": [4, 5, 6, 1, 2, 3]}, dtype="Int64") tm.assert_frame_equal(result, expected) def test_concat_preserves_extension_int64_dtype(): # GH 24768 - df_a = pd.DataFrame({"a": [-1]}, dtype="Int64") - df_b = pd.DataFrame({"b": [1]}, dtype="Int64") + df_a = DataFrame({"a": [-1]}, dtype="Int64") + df_b = DataFrame({"b": [1]}, dtype="Int64") result = pd.concat([df_a, df_b], ignore_index=True) - expected = pd.DataFrame({"a": [-1, None], "b": [None, 1]}, dtype="Int64") + expected = DataFrame({"a": [-1, None], "b": [None, 1]}, dtype="Int64") tm.assert_frame_equal(result, expected) @@ -3111,20 +3102,20 @@ def test_concat_tz_NaT(self, t1): def test_concat_tz_not_aligned(self): # GH#22796 ts = pd.to_datetime([1, 2]).tz_localize("UTC") - a = pd.DataFrame({"A": ts}) - b = pd.DataFrame({"A": ts, "B": ts}) + a = DataFrame({"A": ts}) + b = DataFrame({"A": ts, "B": ts}) result = pd.concat([a, b], sort=True, ignore_index=True) - expected = pd.DataFrame( + expected = DataFrame( {"A": list(ts) + list(ts), "B": [pd.NaT, pd.NaT] + list(ts)} ) tm.assert_frame_equal(result, expected) def test_concat_tuple_keys(self): # GH#14438 - df1 = pd.DataFrame(np.ones((2, 2)), columns=list("AB")) - df2 = pd.DataFrame(np.ones((3, 2)) * 2, columns=list("AB")) + df1 = DataFrame(np.ones((2, 2)), columns=list("AB")) + df2 = DataFrame(np.ones((3, 2)) * 2, columns=list("AB")) results = pd.concat((df1, df2), keys=[("bee", "bah"), ("bee", "boo")]) - expected = pd.DataFrame( + expected = DataFrame( { "A": { ("bee", "bah", 0): 1.0, @@ -3146,10 +3137,10 @@ def test_concat_tuple_keys(self): def test_concat_named_keys(self): # GH#14252 - df = pd.DataFrame({"foo": [1, 2], "bar": [0.1, 0.2]}) + df = DataFrame({"foo": [1, 2], "bar": [0.1, 0.2]}) index = Index(["a", "b"], name="baz") concatted_named_from_keys = pd.concat([df, df], keys=index) - expected_named = pd.DataFrame( + expected_named = DataFrame( {"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]}, index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=["baz", None]), ) @@ -3162,7 +3153,7 @@ def test_concat_named_keys(self): tm.assert_frame_equal(concatted_named_from_names, expected_named) concatted_unnamed = pd.concat([df, df], keys=index_no_name) - expected_unnamed = pd.DataFrame( + expected_unnamed = DataFrame( {"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]}, index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=[None, None]), ) @@ -3170,11 +3161,11 @@ def test_concat_named_keys(self): def test_concat_axis_parameter(self): # GH#14369 - df1 = pd.DataFrame({"A": [0.1, 0.2]}, index=range(2)) - df2 = pd.DataFrame({"A": [0.3, 0.4]}, index=range(2)) + df1 = DataFrame({"A": [0.1, 0.2]}, index=range(2)) + df2 = DataFrame({"A": [0.3, 0.4]}, index=range(2)) # Index/row/0 DataFrame - expected_index = pd.DataFrame({"A": [0.1, 0.2, 0.3, 0.4]}, index=[0, 1, 0, 1]) + expected_index = DataFrame({"A": [0.1, 0.2, 0.3, 0.4]}, index=[0, 1, 0, 1]) concatted_index = pd.concat([df1, df2], axis="index") tm.assert_frame_equal(concatted_index, expected_index) @@ -3186,7 +3177,7 @@ def test_concat_axis_parameter(self): tm.assert_frame_equal(concatted_0, expected_index) # Columns/1 DataFrame - expected_columns = pd.DataFrame( + expected_columns = DataFrame( [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=["A", "A"] ) @@ -3212,7 +3203,7 @@ def test_concat_axis_parameter(self): tm.assert_series_equal(concatted_0_series, expected_index_series) # Columns/1 Series - expected_columns_series = pd.DataFrame( + expected_columns_series = DataFrame( [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=[0, 1] ) @@ -3228,7 +3219,7 @@ def test_concat_axis_parameter(self): def test_concat_numerical_names(self): # GH#15262, GH#12223 - df = pd.DataFrame( + df = DataFrame( {"col": range(9)}, dtype="int32", index=( @@ -3238,7 +3229,7 @@ def test_concat_numerical_names(self): ), ) result = pd.concat((df.iloc[:2, :], df.iloc[-2:, :])) - expected = pd.DataFrame( + expected = DataFrame( {"col": [0, 1, 7, 8]}, dtype="int32", index=pd.MultiIndex.from_tuples( @@ -3249,11 +3240,11 @@ def test_concat_numerical_names(self): def test_concat_astype_dup_col(self): # GH#23049 - df = pd.DataFrame([{"a": "b"}]) + df = DataFrame([{"a": "b"}]) df = pd.concat([df, df], axis=1) result = df.astype("category") - expected = pd.DataFrame( + expected = DataFrame( np.array(["b", "b"]).reshape(1, 2), columns=["a", "a"] ).astype("category") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 79879ef346f53..99beff39e8e09 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -15,7 +15,7 @@ def setup_method(self, method): self.var_name = "var" self.value_name = "val" - self.df1 = pd.DataFrame( + self.df1 = DataFrame( [ [1.067683, -1.110463, 0.20867], [-1.321405, 0.368915, -1.055342], @@ -310,7 +310,7 @@ def test_melt_missing_columns_raises(self): # attempted with column names absent from the dataframe # Generate data - df = pd.DataFrame(np.random.randn(5, 4), columns=list("abcd")) + df = DataFrame(np.random.randn(5, 4), columns=list("abcd")) # Try to melt with missing `value_vars` column name msg = "The following '{Var}' are not present in the DataFrame: {Col}" @@ -634,7 +634,7 @@ class TestWideToLong: def test_simple(self): np.random.seed(123) x = np.random.randn(3) - df = pd.DataFrame( + df = DataFrame( { "A1970": {0: "a", 1: "b", 2: "c"}, "A1980": {0: "d", 1: "e", 2: "f"}, @@ -658,7 +658,7 @@ def test_simple(self): def test_stubs(self): # GH9204 - df = pd.DataFrame([[0, 1, 2, 3, 8], [4, 5, 6, 7, 9]]) + df = DataFrame([[0, 1, 2, 3, 8], [4, 5, 6, 7, 9]]) df.columns = ["id", "inc1", "inc2", "edu1", "edu2"] stubs = ["inc", "edu"] @@ -671,7 +671,7 @@ def test_separating_character(self): # GH14779 np.random.seed(123) x = np.random.randn(3) - df = pd.DataFrame( + df = DataFrame( { "A.1970": {0: "a", 1: "b", 2: "c"}, "A.1980": {0: "d", 1: "e", 2: "f"}, @@ -696,7 +696,7 @@ def test_separating_character(self): def test_escapable_characters(self): np.random.seed(123) x = np.random.randn(3) - df = pd.DataFrame( + df = DataFrame( { "A(quarterly)1970": {0: "a", 1: "b", 2: "c"}, "A(quarterly)1980": {0: "d", 1: "e", 2: "f"}, @@ -722,7 +722,7 @@ def test_escapable_characters(self): def test_unbalanced(self): # test that we can have a varying amount of time variables - df = pd.DataFrame( + df = DataFrame( { "A2010": [1.0, 2.0], "A2011": [3.0, 4.0], @@ -738,14 +738,14 @@ def test_unbalanced(self): "id": [0, 0, 1, 1], "year": [2010, 2011, 2010, 2011], } - expected = pd.DataFrame(exp_data) + expected = DataFrame(exp_data) expected = expected.set_index(["id", "year"])[["X", "A", "B"]] result = wide_to_long(df, ["A", "B"], i="id", j="year") tm.assert_frame_equal(result, expected) def test_character_overlap(self): # Test we handle overlapping characters in both id_vars and value_vars - df = pd.DataFrame( + df = DataFrame( { "A11": ["a11", "a22", "a33"], "A12": ["a21", "a22", "a23"], @@ -758,7 +758,7 @@ def test_character_overlap(self): } ) df["id"] = df.index - expected = pd.DataFrame( + expected = DataFrame( { "BBBX": [91, 92, 93, 91, 92, 93], "BBBZ": [91, 92, 93, 91, 92, 93], @@ -776,7 +776,7 @@ def test_character_overlap(self): def test_invalid_separator(self): # if an invalid separator is supplied a empty data frame is returned sep = "nope!" - df = pd.DataFrame( + df = DataFrame( { "A2010": [1.0, 2.0], "A2011": [3.0, 4.0], @@ -795,7 +795,7 @@ def test_invalid_separator(self): "A": [], "B": [], } - expected = pd.DataFrame(exp_data).astype({"year": "int"}) + expected = DataFrame(exp_data).astype({"year": "int"}) expected = expected.set_index(["id", "year"])[ ["X", "A2010", "A2011", "B2010", "A", "B"] ] @@ -806,7 +806,7 @@ def test_invalid_separator(self): def test_num_string_disambiguation(self): # Test that we can disambiguate number value_vars from # string value_vars - df = pd.DataFrame( + df = DataFrame( { "A11": ["a11", "a22", "a33"], "A12": ["a21", "a22", "a23"], @@ -819,7 +819,7 @@ def test_num_string_disambiguation(self): } ) df["id"] = df.index - expected = pd.DataFrame( + expected = DataFrame( { "Arating": [91, 92, 93, 91, 92, 93], "Arating_old": [91, 92, 93, 91, 92, 93], @@ -839,7 +839,7 @@ def test_num_string_disambiguation(self): def test_invalid_suffixtype(self): # If all stubs names end with a string, but a numeric suffix is # assumed, an empty data frame is returned - df = pd.DataFrame( + df = DataFrame( { "Aone": [1.0, 2.0], "Atwo": [3.0, 4.0], @@ -858,7 +858,7 @@ def test_invalid_suffixtype(self): "A": [], "B": [], } - expected = pd.DataFrame(exp_data).astype({"year": "int"}) + expected = DataFrame(exp_data).astype({"year": "int"}) expected = expected.set_index(["id", "year"]) expected.index = expected.index.set_levels([0, 1], level=0) @@ -867,7 +867,7 @@ def test_invalid_suffixtype(self): def test_multiple_id_columns(self): # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm - df = pd.DataFrame( + df = DataFrame( { "famid": [1, 1, 1, 2, 2, 2, 3, 3, 3], "birth": [1, 2, 3, 1, 2, 3, 1, 2, 3], @@ -875,7 +875,7 @@ def test_multiple_id_columns(self): "ht2": [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9], } ) - expected = pd.DataFrame( + expected = DataFrame( { "ht": [ 2.8, @@ -909,7 +909,7 @@ def test_multiple_id_columns(self): def test_non_unique_idvars(self): # GH16382 # Raise an error message if non unique id vars (i) are passed - df = pd.DataFrame( + df = DataFrame( {"A_A1": [1, 2, 3, 4, 5], "B_B1": [1, 2, 3, 4, 5], "x": [1, 1, 1, 1, 1]} ) msg = "the id variables need to uniquely identify each row" @@ -917,7 +917,7 @@ def test_non_unique_idvars(self): wide_to_long(df, ["A_A", "B_B"], i="x", j="colname") def test_cast_j_int(self): - df = pd.DataFrame( + df = DataFrame( { "actor_1": ["CCH Pounder", "Johnny Depp", "Christoph Waltz"], "actor_2": ["Joel David Moore", "Orlando Bloom", "Rory Kinnear"], @@ -927,7 +927,7 @@ def test_cast_j_int(self): } ) - expected = pd.DataFrame( + expected = DataFrame( { "actor": [ "CCH Pounder", @@ -956,7 +956,7 @@ def test_cast_j_int(self): tm.assert_frame_equal(result, expected) def test_identical_stubnames(self): - df = pd.DataFrame( + df = DataFrame( { "A2010": [1.0, 2.0], "A2011": [3.0, 4.0], @@ -969,7 +969,7 @@ def test_identical_stubnames(self): wide_to_long(df, ["A", "B"], i="A", j="colname") def test_nonnumeric_suffix(self): - df = pd.DataFrame( + df = DataFrame( { "treatment_placebo": [1.0, 2.0], "treatment_test": [3.0, 4.0], @@ -977,7 +977,7 @@ def test_nonnumeric_suffix(self): "A": ["X1", "X2"], } ) - expected = pd.DataFrame( + expected = DataFrame( { "A": ["X1", "X1", "X2", "X2"], "colname": ["placebo", "test", "placebo", "test"], @@ -992,7 +992,7 @@ def test_nonnumeric_suffix(self): tm.assert_frame_equal(result, expected) def test_mixed_type_suffix(self): - df = pd.DataFrame( + df = DataFrame( { "A": ["X1", "X2"], "result_1": [0, 9], @@ -1001,7 +1001,7 @@ def test_mixed_type_suffix(self): "treatment_foo": [3.0, 4.0], } ) - expected = pd.DataFrame( + expected = DataFrame( { "A": ["X1", "X2", "X1", "X2"], "colname": ["1", "1", "foo", "foo"], @@ -1015,7 +1015,7 @@ def test_mixed_type_suffix(self): tm.assert_frame_equal(result, expected) def test_float_suffix(self): - df = pd.DataFrame( + df = DataFrame( { "treatment_1.1": [1.0, 2.0], "treatment_2.1": [3.0, 4.0], @@ -1024,7 +1024,7 @@ def test_float_suffix(self): "A": ["X1", "X2"], } ) - expected = pd.DataFrame( + expected = DataFrame( { "A": ["X1", "X1", "X1", "X1", "X2", "X2", "X2", "X2"], "colname": [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1], @@ -1060,8 +1060,8 @@ def test_warn_of_column_name_value(self): # GH34731 # raise a warning if the resultant value column name matches # a name in the dataframe already (default name is "value") - df = pd.DataFrame({"col": list("ABC"), "value": range(10, 16, 2)}) - expected = pd.DataFrame( + df = DataFrame({"col": list("ABC"), "value": range(10, 16, 2)}) + expected = DataFrame( [["A", "col", "A"], ["B", "col", "B"], ["C", "col", "C"]], columns=["value", "variable", "value"], ) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 943a7d0a3cf86..cfe969b5f61bb 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -110,7 +110,7 @@ def test_pivot_table(self, observed): def test_pivot_table_categorical_observed_equal(self, observed): # issue #24923 - df = pd.DataFrame( + df = DataFrame( {"col1": list("abcde"), "col2": list("fghij"), "col3": [1, 2, 3, 4, 5]} ) @@ -229,7 +229,7 @@ def test_pivot_table_dropna_categoricals(self, dropna): def test_pivot_with_non_observable_dropna(self, dropna): # gh-21133 - df = pd.DataFrame( + df = DataFrame( { "A": pd.Categorical( [np.nan, "low", "high", "low", "high"], @@ -241,7 +241,7 @@ def test_pivot_with_non_observable_dropna(self, dropna): ) result = df.pivot_table(index="A", values="B", dropna=dropna) - expected = pd.DataFrame( + expected = DataFrame( {"B": [2, 3]}, index=pd.Index( pd.Categorical.from_codes( @@ -254,7 +254,7 @@ def test_pivot_with_non_observable_dropna(self, dropna): tm.assert_frame_equal(result, expected) # gh-21378 - df = pd.DataFrame( + df = DataFrame( { "A": pd.Categorical( ["left", "low", "high", "low", "high"], @@ -266,7 +266,7 @@ def test_pivot_with_non_observable_dropna(self, dropna): ) result = df.pivot_table(index="A", values="B", dropna=dropna) - expected = pd.DataFrame( + expected = DataFrame( {"B": [2, 3, 0]}, index=pd.Index( pd.Categorical.from_codes( @@ -395,16 +395,14 @@ def test_pivot_no_values(self): idx = pd.DatetimeIndex( ["2011-01-01", "2011-02-01", "2011-01-02", "2011-01-01", "2011-01-02"] ) - df = pd.DataFrame({"A": [1, 2, 3, 4, 5]}, index=idx) + df = DataFrame({"A": [1, 2, 3, 4, 5]}, index=idx) res = df.pivot_table(index=df.index.month, columns=df.index.day) exp_columns = pd.MultiIndex.from_tuples([("A", 1), ("A", 2)]) - exp = pd.DataFrame( - [[2.5, 4.0], [2.0, np.nan]], index=[1, 2], columns=exp_columns - ) + exp = DataFrame([[2.5, 4.0], [2.0, np.nan]], index=[1, 2], columns=exp_columns) tm.assert_frame_equal(res, exp) - df = pd.DataFrame( + df = DataFrame( { "A": [1, 2, 3, 4, 5], "dt": pd.date_range("2011-01-01", freq="D", periods=5), @@ -416,13 +414,13 @@ def test_pivot_no_values(self): ) exp_columns = pd.MultiIndex.from_tuples([("A", pd.Timestamp("2011-01-31"))]) exp_columns.names = [None, "dt"] - exp = pd.DataFrame([3.25, 2.0], index=[1, 2], columns=exp_columns) + exp = DataFrame([3.25, 2.0], index=[1, 2], columns=exp_columns) tm.assert_frame_equal(res, exp) res = df.pivot_table( index=pd.Grouper(freq="A"), columns=pd.Grouper(key="dt", freq="M") ) - exp = pd.DataFrame( + exp = DataFrame( [3], index=pd.DatetimeIndex(["2011-12-31"], freq="A"), columns=exp_columns ) tm.assert_frame_equal(res, exp) @@ -577,7 +575,7 @@ def test_pivot_with_tz(self, method): def test_pivot_tz_in_values(self): # GH 14948 - df = pd.DataFrame( + df = DataFrame( [ { "uid": "aa", @@ -612,7 +610,7 @@ def test_pivot_tz_in_values(self): columns=[mins], aggfunc=np.min, ) - expected = pd.DataFrame( + expected = DataFrame( [ [ pd.Timestamp("2016-08-12 08:00:00-0700", tz="US/Pacific"), @@ -714,7 +712,7 @@ def test_pivot_periods_with_margins(self): @pytest.mark.parametrize("method", [True, False]) def test_pivot_with_list_like_values(self, values, method): # issue #17160 - df = pd.DataFrame( + df = DataFrame( { "foo": ["one", "one", "one", "two", "two", "two"], "bar": ["A", "B", "C", "A", "B", "C"], @@ -750,7 +748,7 @@ def test_pivot_with_list_like_values(self, values, method): @pytest.mark.parametrize("method", [True, False]) def test_pivot_with_list_like_values_nans(self, values, method): # issue #17160 - df = pd.DataFrame( + df = DataFrame( { "foo": ["one", "one", "one", "two", "two", "two"], "bar": ["A", "B", "C", "A", "B", "C"], @@ -783,9 +781,7 @@ def test_pivot_with_list_like_values_nans(self, values, method): def test_pivot_columns_none_raise_error(self): # GH 30924 - df = pd.DataFrame( - {"col1": ["a", "b", "c"], "col2": [1, 2, 3], "col3": [1, 2, 3]} - ) + df = DataFrame({"col1": ["a", "b", "c"], "col2": [1, 2, 3], "col3": [1, 2, 3]}) msg = r"pivot\(\) missing 1 required argument: 'columns'" with pytest.raises(TypeError, match=msg): df.pivot(index="col1", values="col3") @@ -835,7 +831,7 @@ def test_pivot_with_multiindex(self, method): @pytest.mark.parametrize("method", [True, False]) def test_pivot_with_tuple_of_values(self, method): # issue #17160 - df = pd.DataFrame( + df = DataFrame( { "foo": ["one", "one", "one", "two", "two", "two"], "bar": ["A", "B", "C", "A", "B", "C"], @@ -941,7 +937,7 @@ def test_margin_with_only_columns_defined( self, columns, aggfunc, values, expected_columns ): # GH 31016 - df = pd.DataFrame( + df = DataFrame( { "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"], "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"], @@ -962,9 +958,7 @@ def test_margin_with_only_columns_defined( ) result = df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc) - expected = pd.DataFrame( - values, index=Index(["D", "E"]), columns=expected_columns - ) + expected = DataFrame(values, index=Index(["D", "E"]), columns=expected_columns) tm.assert_frame_equal(result, expected) @@ -1655,9 +1649,7 @@ def test_monthly(self): rng = date_range("1/1/2000", "12/31/2004", freq="M") ts = Series(np.random.randn(len(rng)), index=rng) - annual = pivot_table( - pd.DataFrame(ts), index=ts.index.year, columns=ts.index.month - ) + annual = pivot_table(DataFrame(ts), index=ts.index.year, columns=ts.index.month) annual.columns = annual.columns.droplevel(0) month = ts.index.month @@ -1690,7 +1682,7 @@ def test_pivot_table_with_iterator_values(self): def test_pivot_table_margins_name_with_aggfunc_list(self): # GH 13354 margins_name = "Weekly" - costs = pd.DataFrame( + costs = DataFrame( { "item": ["bacon", "cheese", "bacon", "cheese"], "cost": [2.5, 4.5, 3.2, 3.3], @@ -1714,17 +1706,17 @@ def test_pivot_table_margins_name_with_aggfunc_list(self): ("max", "cost", margins_name), ] cols = pd.MultiIndex.from_tuples(tups, names=[None, None, "day"]) - expected = pd.DataFrame(table.values, index=ix, columns=cols) + expected = DataFrame(table.values, index=ix, columns=cols) tm.assert_frame_equal(table, expected) @pytest.mark.xfail(reason="GH#17035 (np.mean of ints is casted back to ints)") def test_categorical_margins(self, observed): # GH 10989 - df = pd.DataFrame( + df = DataFrame( {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2} ) - expected = pd.DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) + expected = DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) expected.index = Index([0, 1, "All"], name="y") expected.columns = Index([0, 1, "All"], name="z") @@ -1733,11 +1725,11 @@ def test_categorical_margins(self, observed): @pytest.mark.xfail(reason="GH#17035 (np.mean of ints is casted back to ints)") def test_categorical_margins_category(self, observed): - df = pd.DataFrame( + df = DataFrame( {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2} ) - expected = pd.DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) + expected = DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) expected.index = Index([0, 1, "All"], name="y") expected.columns = Index([0, 1, "All"], name="z") @@ -1748,7 +1740,7 @@ def test_categorical_margins_category(self, observed): def test_margins_casted_to_float(self, observed): # GH 24893 - df = pd.DataFrame( + df = DataFrame( { "A": [2, 4, 6, 8], "B": [1, 4, 5, 8], @@ -1758,7 +1750,7 @@ def test_margins_casted_to_float(self, observed): ) result = pd.pivot_table(df, index="D", margins=True) - expected = pd.DataFrame( + expected = DataFrame( {"A": [3, 7, 5], "B": [2.5, 6.5, 4.5], "C": [2, 5, 3.5]}, index=pd.Index(["X", "Y", "All"], name="D"), ) @@ -1768,7 +1760,7 @@ def test_pivot_with_categorical(self, observed, ordered): # gh-21370 idx = [np.nan, "low", "high", "low", np.nan] col = [np.nan, "A", "B", np.nan, "A"] - df = pd.DataFrame( + df = DataFrame( { "In": pd.Categorical(idx, categories=["low", "high"], ordered=ordered), "Col": pd.Categorical(col, categories=["A", "B"], ordered=ordered), @@ -1782,9 +1774,7 @@ def test_pivot_with_categorical(self, observed, ordered): expected_cols = pd.CategoricalIndex(["A", "B"], ordered=ordered, name="Col") - expected = pd.DataFrame( - data=[[2.0, np.nan], [np.nan, 3.0]], columns=expected_cols - ) + expected = DataFrame(data=[[2.0, np.nan], [np.nan, 3.0]], columns=expected_cols) expected.index = Index( pd.Categorical( ["low", "high"], categories=["low", "high"], ordered=ordered @@ -1797,7 +1787,7 @@ def test_pivot_with_categorical(self, observed, ordered): # case with columns/value result = df.pivot_table(columns="Col", values="Val", observed=observed) - expected = pd.DataFrame( + expected = DataFrame( data=[[3.5, 3.0]], columns=expected_cols, index=Index(["Val"]) ) @@ -1805,7 +1795,7 @@ def test_pivot_with_categorical(self, observed, ordered): def test_categorical_aggfunc(self, observed): # GH 9534 - df = pd.DataFrame( + df = DataFrame( {"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]} ) df["C1"] = df["C1"].astype("category") @@ -1818,14 +1808,14 @@ def test_categorical_aggfunc(self, observed): ) expected_columns = pd.Index(["a", "b"], name="C2") expected_data = np.array([[1, 0], [1, 0], [0, 2]], dtype=np.int64) - expected = pd.DataFrame( + expected = DataFrame( expected_data, index=expected_index, columns=expected_columns ) tm.assert_frame_equal(result, expected) def test_categorical_pivot_index_ordering(self, observed): # GH 8731 - df = pd.DataFrame( + df = DataFrame( { "Sales": [100, 120, 220], "Month": ["January", "January", "January"], @@ -1859,7 +1849,7 @@ def test_categorical_pivot_index_ordering(self, observed): months, categories=months, ordered=False, name="Month" ) expected_data = [[320, 120]] + [[0, 0]] * 11 - expected = pd.DataFrame( + expected = DataFrame( expected_data, index=expected_index, columns=expected_columns ) if observed: @@ -1898,12 +1888,12 @@ def test_pivot_table_not_series(self): def test_pivot_margins_name_unicode(self): # issue #13292 greek = "\u0394\u03bf\u03ba\u03b9\u03bc\u03ae" - frame = pd.DataFrame({"foo": [1, 2, 3]}) + frame = DataFrame({"foo": [1, 2, 3]}) table = pd.pivot_table( frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek ) index = pd.Index([1, 2, 3, greek], dtype="object", name="foo") - expected = pd.DataFrame(index=index) + expected = DataFrame(index=index) tm.assert_frame_equal(table, expected) def test_pivot_string_as_func(self): @@ -2001,7 +1991,7 @@ def test_pivot_number_of_levels_larger_than_int32(self): def test_pivot_table_aggfunc_dropna(self, dropna): # GH 22159 - df = pd.DataFrame( + df = DataFrame( { "fruit": ["apple", "peach", "apple"], "size": [1, 1, 2], @@ -2027,7 +2017,7 @@ def ret_none(x): [["ret_sum", "ret_none", "ret_one"], ["apple", "peach"]], names=[None, "fruit"], ) - expected = pd.DataFrame(data, index=["size", "taste"], columns=col) + expected = DataFrame(data, index=["size", "taste"], columns=col) if dropna: expected = expected.dropna(axis="columns") @@ -2036,7 +2026,7 @@ def ret_none(x): def test_pivot_table_aggfunc_scalar_dropna(self, dropna): # GH 22159 - df = pd.DataFrame( + df = DataFrame( {"A": ["one", "two", "one"], "x": [3, np.nan, 2], "y": [1, np.nan, np.nan]} ) @@ -2044,7 +2034,7 @@ def test_pivot_table_aggfunc_scalar_dropna(self, dropna): data = [[2.5, np.nan], [1, np.nan]] col = pd.Index(["one", "two"], name="A") - expected = pd.DataFrame(data, index=["x", "y"], columns=col) + expected = DataFrame(data, index=["x", "y"], columns=col) if dropna: expected = expected.dropna(axis="columns") @@ -2053,7 +2043,7 @@ def test_pivot_table_aggfunc_scalar_dropna(self, dropna): def test_pivot_table_empty_aggfunc(self): # GH 9186 - df = pd.DataFrame( + df = DataFrame( { "A": [2, 2, 3, 3, 2], "id": [5, 6, 7, 8, 9], @@ -2062,7 +2052,7 @@ def test_pivot_table_empty_aggfunc(self): } ) result = df.pivot_table(index="A", columns="D", values="id", aggfunc=np.size) - expected = pd.DataFrame() + expected = DataFrame() tm.assert_frame_equal(result, expected) def test_pivot_table_no_column_raises(self): @@ -2070,8 +2060,6 @@ def test_pivot_table_no_column_raises(self): def agg(l): return np.mean(l) - foo = pd.DataFrame( - {"X": [0, 0, 1, 1], "Y": [0, 1, 0, 1], "Z": [10, 20, 30, 40]} - ) + foo = DataFrame({"X": [0, 0, 1, 1], "Y": [0, 1, 0, 1], "Z": [10, 20, 30, 40]}) with pytest.raises(KeyError, match="notpresent"): foo.pivot_table("notpresent", "X", "Y", aggfunc=agg) diff --git a/pandas/tests/series/apply/test_series_apply.py b/pandas/tests/series/apply/test_series_apply.py index 61ebd2fcb3a27..2627e8b8608a9 100644 --- a/pandas/tests/series/apply/test_series_apply.py +++ b/pandas/tests/series/apply/test_series_apply.py @@ -156,7 +156,7 @@ def f(x): def test_apply_dict_depr(self): - tsdf = pd.DataFrame( + tsdf = DataFrame( np.random.randn(10, 3), columns=["A", "B", "C"], index=pd.date_range("1/1/2000", periods=10), @@ -566,7 +566,7 @@ def test_map_dict_with_tuple_keys(self): from being mapped properly. """ # GH 18496 - df = pd.DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]}) + df = DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]}) label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"} df["labels"] = df["a"].map(label_mappings) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 2e3d67786afdc..392f352711210 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -109,7 +109,7 @@ def test_slicing_datetimes(): tm.assert_frame_equal(result, expected) # duplicates - df = pd.DataFrame( + df = DataFrame( np.arange(5.0, dtype="float64"), index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 2, 3, 4]], ) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 3d927a80a157c..8c53ed85a20b3 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -263,7 +263,7 @@ def test_setitem_ambiguous_keyerror(): def test_getitem_dataframe(): rng = list(range(10)) s = Series(10, index=rng) - df = pd.DataFrame(rng, index=rng) + df = DataFrame(rng, index=rng) msg = ( "Indexing a Series with DataFrame is not supported, " "use the appropriate DataFrame column" diff --git a/pandas/tests/series/methods/test_append.py b/pandas/tests/series/methods/test_append.py index e1d0bced55d98..4c2bf4683d17d 100644 --- a/pandas/tests/series/methods/test_append.py +++ b/pandas/tests/series/methods/test_append.py @@ -63,7 +63,7 @@ def test_append_tuples(self): def test_append_dataframe_raises(self): # GH 31413 - df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}) + df = DataFrame({"A": [1, 2], "B": [3, 4]}) msg = "to_append should be a Series or list/tuple of Series, got DataFrame" with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/series/methods/test_unstack.py b/pandas/tests/series/methods/test_unstack.py index d8099e84a324d..38955ea7f06c4 100644 --- a/pandas/tests/series/methods/test_unstack.py +++ b/pandas/tests/series/methods/test_unstack.py @@ -73,7 +73,7 @@ def test_unstack_tuplename_in_multiindex(): ser = Series(1, index=idx) result = ser.unstack(("A", "a")) - expected = pd.DataFrame( + expected = DataFrame( [[1, 1, 1], [1, 1, 1], [1, 1, 1]], columns=pd.MultiIndex.from_tuples([("a",), ("b",), ("c",)], names=[("A", "a")]), index=pd.Index([1, 2, 3], name=("B", "b")), @@ -112,7 +112,7 @@ def test_unstack_mixed_type_name_in_multiindex( ser = Series(1, index=idx) result = ser.unstack(unstack_idx) - expected = pd.DataFrame( + expected = DataFrame( expected_values, columns=expected_columns, index=expected_index ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 491b3a62b7d73..1ca639e85d913 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -712,7 +712,7 @@ def test_constructor_datelike_coercion(self): wing1 = "2T15 4H19".split() wing2 = "416 4T20".split() mat = pd.to_datetime("2016-01-22 2019-09-07".split()) - df = pd.DataFrame({"wing1": wing1, "wing2": wing2, "mat": mat}, index=belly) + df = DataFrame({"wing1": wing1, "wing2": wing2, "mat": mat}, index=belly) result = df.loc["3T19"] assert result.dtype == object diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py index df7ea46dc4f86..08bb24a01b088 100644 --- a/pandas/tests/series/test_logical_ops.py +++ b/pandas/tests/series/test_logical_ops.py @@ -449,7 +449,7 @@ def test_logical_ops_df_compat(self): tm.assert_frame_equal(s1.to_frame() & s2.to_frame(), exp.to_frame()) tm.assert_frame_equal(s2.to_frame() & s1.to_frame(), exp.to_frame()) - exp = pd.DataFrame({"x": [True, True, np.nan, np.nan]}, index=list("ABCD")) + exp = DataFrame({"x": [True, True, np.nan, np.nan]}, index=list("ABCD")) tm.assert_frame_equal(s1.to_frame() | s2.to_frame(), exp_or1.to_frame()) tm.assert_frame_equal(s2.to_frame() | s1.to_frame(), exp_or.to_frame()) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index c4cd12fcbdf3b..31c0e7f54d12b 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -82,7 +82,7 @@ def f(x): def test_asfreq_resample_set_correct_freq(self): # GH5613 # we test if .asfreq() and .resample() set the correct value for .freq - df = pd.DataFrame( + df = DataFrame( {"date": ["2012-01-01", "2012-01-02", "2012-01-03"], "col": [1, 2, 3]} ) df = df.set_index(pd.to_datetime(df.date)) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 9c29d3a062dfa..810d98fd5bb89 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -375,11 +375,11 @@ def test_unstack_partial( # https://github.com/pandas-dev/pandas/issues/19351 # make sure DataFrame.unstack() works when its run on a subset of the DataFrame # and the Index levels contain values that are not present in the subset - result = pd.DataFrame(result_rows, columns=result_columns).set_index( + result = DataFrame(result_rows, columns=result_columns).set_index( ["ix1", "ix2"] ) result = result.iloc[1:2].unstack("ix2") - expected = pd.DataFrame( + expected = DataFrame( [expected_row], columns=pd.MultiIndex.from_product( [result_columns[2:], [index_product]], names=[None, "ix2"] @@ -925,7 +925,7 @@ def test_stack_unstack_unordered_multiindex(self): [f"a{x}" for x in values], # a0, a1, .. ] ) - df = pd.DataFrame(data.T, columns=["b", "a"]) + df = DataFrame(data.T, columns=["b", "a"]) df.columns.name = "first" second_level_dict = {"x": df} multi_level_df = pd.concat(second_level_dict, axis=1) @@ -1919,7 +1919,7 @@ def test_multilevel_index_loc_order(self, dim, keys, expected): # GH 22797 # Try to respect order of keys given for MultiIndex.loc kwargs = {dim: [["c", "a", "a", "b", "b"], [1, 1, 2, 1, 2]]} - df = pd.DataFrame(np.arange(25).reshape(5, 5), **kwargs) + df = DataFrame(np.arange(25).reshape(5, 5), **kwargs) exp_index = MultiIndex.from_arrays(expected) if dim == "index": res = df.loc[keys, :] diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index a070d45089f96..7ee4b86fb4049 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1418,7 +1418,7 @@ def test_dataframe_dtypes(self, cache): def test_dataframe_utc_true(self): # GH 23760 - df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) + df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) result = pd.to_datetime(df, utc=True) expected = Series( np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[ns]") diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index 5174ff005b5fb..6111797d70268 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -265,14 +265,14 @@ def test_assert_frame_equal_interval_dtype_mismatch(): @pytest.mark.parametrize("right_dtype", ["Int32", "int64"]) def test_assert_frame_equal_ignore_extension_dtype_mismatch(right_dtype): # https://github.com/pandas-dev/pandas/issues/35715 - left = pd.DataFrame({"a": [1, 2, 3]}, dtype="Int64") - right = pd.DataFrame({"a": [1, 2, 3]}, dtype=right_dtype) + left = DataFrame({"a": [1, 2, 3]}, dtype="Int64") + right = DataFrame({"a": [1, 2, 3]}, dtype=right_dtype) tm.assert_frame_equal(left, right, check_dtype=False) def test_allows_duplicate_labels(): - left = pd.DataFrame() - right = pd.DataFrame().set_flags(allows_duplicate_labels=False) + left = DataFrame() + right = DataFrame().set_flags(allows_duplicate_labels=False) tm.assert_frame_equal(left, left) tm.assert_frame_equal(right, right) tm.assert_frame_equal(left, right, check_flags=False) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index f761b6b4ffd7a..cf618f7c828aa 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -300,19 +300,19 @@ def test_hash_with_tuple(): # GH#28969 array containing a tuple raises on call to arr.astype(str) # apparently a numpy bug github.com/numpy/numpy/issues/9441 - df = pd.DataFrame({"data": [tuple("1"), tuple("2")]}) + df = DataFrame({"data": [tuple("1"), tuple("2")]}) result = hash_pandas_object(df) expected = Series([10345501319357378243, 8331063931016360761], dtype=np.uint64) tm.assert_series_equal(result, expected) - df2 = pd.DataFrame({"data": [tuple([1]), tuple([2])]}) + df2 = DataFrame({"data": [tuple([1]), tuple([2])]}) result = hash_pandas_object(df2) expected = Series([9408946347443669104, 3278256261030523334], dtype=np.uint64) tm.assert_series_equal(result, expected) # require that the elements of such tuples are themselves hashable - df3 = pd.DataFrame({"data": [tuple([1, []]), tuple([2, {}])]}) + df3 = DataFrame({"data": [tuple([1, []]), tuple([2, {}])]}) with pytest.raises(TypeError, match="unhashable type: 'list'"): hash_pandas_object(df3) diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index eb14ecfba1f51..6e5d7b4df00e1 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -305,7 +305,7 @@ def test_preserve_metadata(): ) def test_multiple_agg_funcs(func, window_size, expected_vals): # GH 15072 - df = pd.DataFrame( + df = DataFrame( [ ["A", 10, 20], ["A", 20, 30], @@ -331,7 +331,7 @@ def test_multiple_agg_funcs(func, window_size, expected_vals): columns = pd.MultiIndex.from_tuples( [("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")] ) - expected = pd.DataFrame(expected_vals, index=index, columns=columns) + expected = DataFrame(expected_vals, index=index, columns=columns) result = window.agg(dict((("low", ["mean", "max"]), ("high", ["mean", "min"])))) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 3dc1974685226..183d2814920e4 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -235,7 +235,7 @@ def test_iter_expanding_series(ser, expected, min_periods): def test_center_deprecate_warning(): # GH 20647 - df = pd.DataFrame() + df = DataFrame() with tm.assert_produces_warning(FutureWarning): df.expanding(center=True) diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index fbdf8c775530a..101d65c885c9b 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -131,7 +131,7 @@ def test_rolling_apply(self, raw): def test_rolling_apply_mutability(self): # GH 14013 - df = pd.DataFrame({"A": ["foo"] * 3 + ["bar"] * 3, "B": [1] * 6}) + df = DataFrame({"A": ["foo"] * 3 + ["bar"] * 3, "B": [1] * 6}) g = df.groupby("A") mi = pd.MultiIndex.from_tuples( @@ -140,7 +140,7 @@ def test_rolling_apply_mutability(self): mi.names = ["A", None] # Grouped column should not be a part of the output - expected = pd.DataFrame([np.nan, 2.0, 2.0] * 2, columns=["B"], index=mi) + expected = DataFrame([np.nan, 2.0, 2.0] * 2, columns=["B"], index=mi) result = g.rolling(window=2).sum() tm.assert_frame_equal(result, expected) @@ -221,7 +221,7 @@ def test_groupby_rolling(self, expected_value, raw_value): def foo(x): return int(isinstance(x, np.ndarray)) - df = pd.DataFrame({"id": [1, 1, 1], "value": [1, 2, 3]}) + df = DataFrame({"id": [1, 1, 1], "value": [1, 2, 3]}) result = df.groupby("id").value.rolling(1).apply(foo, raw=raw_value) expected = Series( [expected_value] * 3, @@ -250,9 +250,9 @@ def test_groupby_rolling_center_center(self): ) tm.assert_series_equal(result, expected) - df = pd.DataFrame({"a": ["a"] * 5 + ["b"] * 6, "b": range(11)}) + df = DataFrame({"a": ["a"] * 5 + ["b"] * 6, "b": range(11)}) result = df.groupby("a").rolling(center=True, window=3).mean() - expected = pd.DataFrame( + expected = DataFrame( [np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, 9, np.nan], index=pd.MultiIndex.from_tuples( ( @@ -274,9 +274,9 @@ def test_groupby_rolling_center_center(self): ) tm.assert_frame_equal(result, expected) - df = pd.DataFrame({"a": ["a"] * 5 + ["b"] * 5, "b": range(10)}) + df = DataFrame({"a": ["a"] * 5 + ["b"] * 5, "b": range(10)}) result = df.groupby("a").rolling(center=True, window=3).mean() - expected = pd.DataFrame( + expected = DataFrame( [np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, np.nan], index=pd.MultiIndex.from_tuples( ( @@ -299,7 +299,7 @@ def test_groupby_rolling_center_center(self): def test_groupby_rolling_center_on(self): # GH 37141 - df = pd.DataFrame( + df = DataFrame( data={ "Date": pd.date_range("2020-01-01", "2020-01-10"), "gb": ["group_1"] * 6 + ["group_2"] * 4, @@ -335,7 +335,7 @@ def test_groupby_rolling_center_on(self): @pytest.mark.parametrize("min_periods", [5, 4, 3]) def test_groupby_rolling_center_min_periods(self, min_periods): # GH 36040 - df = pd.DataFrame({"group": ["A"] * 10 + ["B"] * 10, "data": range(20)}) + df = DataFrame({"group": ["A"] * 10 + ["B"] * 10, "data": range(20)}) window_size = 5 result = ( @@ -353,7 +353,7 @@ def test_groupby_rolling_center_min_periods(self, min_periods): grp_A_expected = nans + grp_A_mean[num_nans : 10 - num_nans] + nans grp_B_expected = nans + grp_B_mean[num_nans : 10 - num_nans] + nans - expected = pd.DataFrame( + expected = DataFrame( {"group": ["A"] * 10 + ["B"] * 10, "data": grp_A_expected + grp_B_expected} ) @@ -396,7 +396,7 @@ def get_window_bounds( start[start < 0] = min_periods return start, end - df = pd.DataFrame( + df = DataFrame( {"a": [1.0, 2.0, 3.0, 4.0, 5.0] * 3}, index=[0] * 5 + [1] * 5 + [2] * 5 ) result = ( @@ -409,7 +409,7 @@ def get_window_bounds( def test_groupby_rolling_subset_with_closed(self): # GH 35549 - df = pd.DataFrame( + df = DataFrame( { "column1": range(6), "column2": range(6), @@ -433,7 +433,7 @@ def test_groupby_rolling_subset_with_closed(self): def test_groupby_subset_rolling_subset_with_closed(self): # GH 35549 - df = pd.DataFrame( + df = DataFrame( { "column1": range(6), "column2": range(6), @@ -481,19 +481,19 @@ def test_groupby_rolling_index_changed(self, func): def test_groupby_rolling_empty_frame(self): # GH 36197 - expected = pd.DataFrame({"s1": []}) + expected = DataFrame({"s1": []}) result = expected.groupby("s1").rolling(window=1).sum() expected.index = pd.MultiIndex.from_tuples([], names=["s1", None]) tm.assert_frame_equal(result, expected) - expected = pd.DataFrame({"s1": [], "s2": []}) + expected = DataFrame({"s1": [], "s2": []}) result = expected.groupby(["s1", "s2"]).rolling(window=1).sum() expected.index = pd.MultiIndex.from_tuples([], names=["s1", "s2", None]) tm.assert_frame_equal(result, expected) def test_groupby_rolling_string_index(self): # GH: 36727 - df = pd.DataFrame( + df = DataFrame( [ ["A", "group_1", pd.Timestamp(2019, 1, 1, 9)], ["B", "group_1", pd.Timestamp(2019, 1, 2, 9)], @@ -508,7 +508,7 @@ def test_groupby_rolling_string_index(self): df["count_to_date"] = groups.cumcount() rolling_groups = groups.rolling("10d", on="eventTime") result = rolling_groups.apply(lambda df: df.shape[0]) - expected = pd.DataFrame( + expected = DataFrame( [ ["A", "group_1", pd.Timestamp(2019, 1, 1, 9), 1.0], ["B", "group_1", pd.Timestamp(2019, 1, 2, 9), 2.0], @@ -523,12 +523,12 @@ def test_groupby_rolling_string_index(self): def test_groupby_rolling_no_sort(self): # GH 36889 result = ( - pd.DataFrame({"foo": [2, 1], "bar": [2, 1]}) + DataFrame({"foo": [2, 1], "bar": [2, 1]}) .groupby("foo", sort=False) .rolling(1) .min() ) - expected = pd.DataFrame( + expected = DataFrame( np.array([[2.0, 2.0], [1.0, 1.0]]), columns=["foo", "bar"], index=pd.MultiIndex.from_tuples([(2, 0), (1, 1)], names=["foo", None]), @@ -537,7 +537,7 @@ def test_groupby_rolling_no_sort(self): def test_groupby_rolling_count_closed_on(self): # GH 35869 - df = pd.DataFrame( + df = DataFrame( { "column1": range(6), "column2": range(6), @@ -573,11 +573,11 @@ def test_groupby_rolling_count_closed_on(self): ) def test_groupby_rolling_sem(self, func, kwargs): # GH: 26476 - df = pd.DataFrame( + df = DataFrame( [["a", 1], ["a", 2], ["b", 1], ["b", 2], ["b", 3]], columns=["a", "b"] ) result = getattr(df.groupby("a"), func)(**kwargs).sem() - expected = pd.DataFrame( + expected = DataFrame( {"a": [np.nan] * 5, "b": [np.nan, 0.70711, np.nan, 0.70711, 0.70711]}, index=pd.MultiIndex.from_tuples( [("a", 0), ("a", 1), ("b", 2), ("b", 3), ("b", 4)], names=["a", None] @@ -590,7 +590,7 @@ def test_groupby_rolling_sem(self, func, kwargs): ) def test_groupby_rolling_nans_in_index(self, rollings, key): # GH: 34617 - df = pd.DataFrame( + df = DataFrame( { "a": pd.to_datetime(["2020-06-01 12:00", "2020-06-01 14:00", np.nan]), "b": [1, 2, 3], diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 312b30e4491a6..048f7b8287176 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -155,7 +155,7 @@ def test_closed_one_entry(func): @pytest.mark.parametrize("func", ["min", "max"]) def test_closed_one_entry_groupby(func): # GH24718 - ser = pd.DataFrame( + ser = DataFrame( data={"A": [1, 1, 2], "B": [3, 2, 1]}, index=pd.date_range("2000", periods=3) ) result = getattr( @@ -355,14 +355,14 @@ def test_readonly_array(): def test_rolling_datetime(axis_frame, tz_naive_fixture): # GH-28192 tz = tz_naive_fixture - df = pd.DataFrame( + df = DataFrame( {i: [1] * 2 for i in pd.date_range("2019-8-01", "2019-08-03", freq="D", tz=tz)} ) if axis_frame in [0, "index"]: result = df.T.rolling("2D", axis=axis_frame).sum().T else: result = df.rolling("2D", axis=axis_frame).sum() - expected = pd.DataFrame( + expected = DataFrame( { **{ i: [1.0] * 2 @@ -438,7 +438,7 @@ def test_rolling_window_as_string(): def test_min_periods1(): # GH#6795 - df = pd.DataFrame([0, 1, 2, 1, 0], columns=["a"]) + df = DataFrame([0, 1, 2, 1, 0], columns=["a"]) result = df["a"].rolling(3, center=True, min_periods=1).max() expected = Series([1.0, 2.0, 2.0, 2.0, 1.0], name="a") tm.assert_series_equal(result, expected) @@ -706,7 +706,7 @@ def scaled_sum(*args): @pytest.mark.parametrize("add", [0.0, 2.0]) def test_rolling_numerical_accuracy_kahan_mean(add): # GH: 36031 implementing kahan summation - df = pd.DataFrame( + df = DataFrame( {"A": [3002399751580331.0 + add, -0.0, -0.0]}, index=[ pd.Timestamp("19700101 09:00:00"), @@ -718,7 +718,7 @@ def test_rolling_numerical_accuracy_kahan_mean(add): df.resample("1s").ffill().rolling("3s", closed="left", min_periods=3).mean() ) dates = pd.date_range("19700101 09:00:00", periods=7, freq="S") - expected = pd.DataFrame( + expected = DataFrame( { "A": [ np.nan, @@ -737,7 +737,7 @@ def test_rolling_numerical_accuracy_kahan_mean(add): def test_rolling_numerical_accuracy_kahan_sum(): # GH: 13254 - df = pd.DataFrame([2.186, -1.647, 0.0, 0.0, 0.0, 0.0], columns=["x"]) + df = DataFrame([2.186, -1.647, 0.0, 0.0, 0.0, 0.0], columns=["x"]) result = df["x"].rolling(3).sum() expected = Series([np.nan, np.nan, 0.539, -1.647, 0.0, 0.0], name="x") tm.assert_series_equal(result, expected) @@ -750,7 +750,7 @@ def test_rolling_numerical_accuracy_jump(): ) data = np.random.rand(len(index)) - df = pd.DataFrame({"data": data}, index=index) + df = DataFrame({"data": data}, index=index) result = df.rolling("60s").mean() tm.assert_frame_equal(result, df[["data"]]) @@ -784,10 +784,10 @@ def test_rolling_numerical_too_large_numbers(): ) def test_rolling_mixed_dtypes_axis_1(func, value): # GH: 20649 - df = pd.DataFrame(1, index=[1, 2], columns=["a", "b", "c"]) + df = DataFrame(1, index=[1, 2], columns=["a", "b", "c"]) df["c"] = 1.0 result = getattr(df.rolling(window=2, min_periods=1, axis=1), func)() - expected = pd.DataFrame( + expected = DataFrame( {"a": [1.0, 1.0], "b": [value, value], "c": [value, value]}, index=[1, 2] ) tm.assert_frame_equal(result, expected) @@ -795,7 +795,7 @@ def test_rolling_mixed_dtypes_axis_1(func, value): def test_rolling_axis_one_with_nan(): # GH: 35596 - df = pd.DataFrame( + df = DataFrame( [ [0, 1, 2, 4, np.nan, np.nan, np.nan], [0, 1, 2, np.nan, np.nan, np.nan, np.nan], @@ -803,7 +803,7 @@ def test_rolling_axis_one_with_nan(): ] ) result = df.rolling(window=7, min_periods=1, axis="columns").sum() - expected = pd.DataFrame( + expected = DataFrame( [ [0.0, 1.0, 3.0, 7.0, 7.0, 7.0, 7.0], [0.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0], @@ -819,17 +819,17 @@ def test_rolling_axis_one_with_nan(): ) def test_rolling_axis_1_non_numeric_dtypes(value): # GH: 20649 - df = pd.DataFrame({"a": [1, 2]}) + df = DataFrame({"a": [1, 2]}) df["b"] = value result = df.rolling(window=2, min_periods=1, axis=1).sum() - expected = pd.DataFrame({"a": [1.0, 2.0]}) + expected = DataFrame({"a": [1.0, 2.0]}) tm.assert_frame_equal(result, expected) def test_rolling_on_df_transposed(): # GH: 32724 - df = pd.DataFrame({"A": [1, None], "B": [4, 5], "C": [7, 8]}) - expected = pd.DataFrame({"A": [1.0, np.nan], "B": [5.0, 5.0], "C": [11.0, 13.0]}) + df = DataFrame({"A": [1, None], "B": [4, 5], "C": [7, 8]}) + expected = DataFrame({"A": [1.0, np.nan], "B": [5.0, 5.0], "C": [11.0, 13.0]}) result = df.rolling(min_periods=1, window=2, axis=1).sum() tm.assert_frame_equal(result, expected)