diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/methods/test_dtypes.py similarity index 53% rename from pandas/tests/frame/test_dtypes.py rename to pandas/tests/frame/methods/test_dtypes.py index 1add4c0db2e53..0105eef435121 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/methods/test_dtypes.py @@ -1,7 +1,6 @@ from datetime import timedelta import numpy as np -import pytest from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -89,16 +88,7 @@ def test_dtypes_gh8722(self, float_string_frame): result = df.dtypes tm.assert_series_equal(result, Series({0: np.dtype("int64")})) - def test_singlerow_slice_categoricaldtype_gives_series(self): - # GH29521 - df = DataFrame({"x": pd.Categorical("a b c d e".split())}) - result = df.iloc[0] - raw_cat = pd.Categorical(["a"], categories=["a", "b", "c", "d", "e"]) - expected = Series(raw_cat, index=["x"], name=0, dtype="category") - - tm.assert_series_equal(result, expected) - - def test_timedeltas(self): + def test_dtypes_timedeltas(self): df = DataFrame( dict( A=Series(date_range("2012-1-1", periods=3, freq="D")), @@ -136,95 +126,3 @@ def test_timedeltas(self): index=list("ABCD"), ) tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "input_vals", - [ - ([1, 2]), - (["1", "2"]), - (list(pd.date_range("1/1/2011", periods=2, freq="H"))), - (list(pd.date_range("1/1/2011", periods=2, freq="H", tz="US/Eastern"))), - ([pd.Interval(left=0, right=5)]), - ], - ) - def test_constructor_list_str(self, input_vals, string_dtype): - # GH 16605 - # Ensure that data elements are converted to strings when - # dtype is str, 'str', or 'U' - - result = DataFrame({"A": input_vals}, dtype=string_dtype) - expected = DataFrame({"A": input_vals}).astype({"A": string_dtype}) - tm.assert_frame_equal(result, expected) - - def test_constructor_list_str_na(self, string_dtype): - - result = DataFrame({"A": [1.0, 2.0, None]}, dtype=string_dtype) - expected = DataFrame({"A": ["1.0", "2.0", None]}, dtype=object) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "data, expected", - [ - # empty - (DataFrame(), True), - # multi-same - (DataFrame({"A": [1, 2], "B": [1, 2]}), True), - # multi-object - ( - DataFrame( - { - "A": np.array([1, 2], dtype=object), - "B": np.array(["a", "b"], dtype=object), - } - ), - True, - ), - # multi-extension - ( - DataFrame( - {"A": pd.Categorical(["a", "b"]), "B": pd.Categorical(["a", "b"])} - ), - True, - ), - # differ types - (DataFrame({"A": [1, 2], "B": [1.0, 2.0]}), False), - # differ sizes - ( - DataFrame( - { - "A": np.array([1, 2], dtype=np.int32), - "B": np.array([1, 2], dtype=np.int64), - } - ), - False, - ), - # multi-extension differ - ( - DataFrame( - {"A": pd.Categorical(["a", "b"]), "B": pd.Categorical(["b", "c"])} - ), - False, - ), - ], - ) - def test_is_homogeneous_type(self, data, expected): - assert data._is_homogeneous_type is expected - - def test_asarray_homogenous(self): - df = DataFrame({"A": pd.Categorical([1, 2]), "B": pd.Categorical([1, 2])}) - result = np.asarray(df) - # may change from object in the future - expected = np.array([[1, 1], [2, 2]], dtype="object") - tm.assert_numpy_array_equal(result, expected) - - def test_str_to_small_float_conversion_type(self): - # GH 20388 - np.random.seed(13) - col_data = [str(np.random.random() * 1e-12) for _ in range(5)] - result = DataFrame(col_data, columns=["A"]) - expected = DataFrame(col_data, columns=["A"], dtype=object) - tm.assert_frame_equal(result, expected) - # change the dtype of the elements from object to float one by one - result.loc[result.index, "A"] = [float(x) for x in col_data] - expected = DataFrame(col_data, columns=["A"], dtype=float) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_is_homogeneous_dtype.py b/pandas/tests/frame/methods/test_is_homogeneous_dtype.py new file mode 100644 index 0000000000000..0fca4e988b775 --- /dev/null +++ b/pandas/tests/frame/methods/test_is_homogeneous_dtype.py @@ -0,0 +1,49 @@ +import numpy as np +import pytest + +from pandas import Categorical, DataFrame + + +@pytest.mark.parametrize( + "data, expected", + [ + # empty + (DataFrame(), True), + # multi-same + (DataFrame({"A": [1, 2], "B": [1, 2]}), True), + # multi-object + ( + DataFrame( + { + "A": np.array([1, 2], dtype=object), + "B": np.array(["a", "b"], dtype=object), + } + ), + True, + ), + # multi-extension + ( + DataFrame({"A": Categorical(["a", "b"]), "B": Categorical(["a", "b"])}), + True, + ), + # differ types + (DataFrame({"A": [1, 2], "B": [1.0, 2.0]}), False), + # differ sizes + ( + DataFrame( + { + "A": np.array([1, 2], dtype=np.int32), + "B": np.array([1, 2], dtype=np.int64), + } + ), + False, + ), + # multi-extension differ + ( + DataFrame({"A": Categorical(["a", "b"]), "B": Categorical(["b", "c"])}), + False, + ), + ], +) +def test_is_homogeneous_type(data, expected): + assert data._is_homogeneous_type is expected diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 1521f66a6bc61..bbcc286d89986 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2697,6 +2697,31 @@ def test_frame_ctor_datetime64_column(self): df = DataFrame({"A": np.random.randn(len(rng)), "B": dates}) assert np.issubdtype(df["B"].dtype, np.dtype("M8[ns]")) + @pytest.mark.parametrize( + "input_vals", + [ + ([1, 2]), + (["1", "2"]), + (list(date_range("1/1/2011", periods=2, freq="H"))), + (list(date_range("1/1/2011", periods=2, freq="H", tz="US/Eastern"))), + ([pd.Interval(left=0, right=5)]), + ], + ) + def test_constructor_list_str(self, input_vals, string_dtype): + # GH#16605 + # Ensure that data elements are converted to strings when + # dtype is str, 'str', or 'U' + + result = DataFrame({"A": input_vals}, dtype=string_dtype) + expected = DataFrame({"A": input_vals}).astype({"A": string_dtype}) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_str_na(self, string_dtype): + + result = DataFrame({"A": [1.0, 2.0, None]}, dtype=string_dtype) + expected = DataFrame({"A": ["1.0", "2.0", None]}, dtype=object) + tm.assert_frame_equal(result, expected) + class TestDataFrameConstructorWithDatetimeTZ: def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture): diff --git a/pandas/tests/frame/test_npfuncs.py b/pandas/tests/frame/test_npfuncs.py new file mode 100644 index 0000000000000..a3b4c659a4124 --- /dev/null +++ b/pandas/tests/frame/test_npfuncs.py @@ -0,0 +1,16 @@ +""" +Tests for np.foo applied to DataFrame, not necessarily ufuncs. +""" +import numpy as np + +from pandas import Categorical, DataFrame +import pandas._testing as tm + + +class TestAsArray: + def test_asarray_homogenous(self): + df = DataFrame({"A": Categorical([1, 2]), "B": Categorical([1, 2])}) + result = np.asarray(df) + # may change from object in the future + expected = np.array([[1, 1], [2, 2]], dtype="object") + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 31abe45215432..4ef6463fd9e31 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -739,6 +739,15 @@ def test_iloc_with_boolean_operation(self): expected = DataFrame([[0.0, 4.0], [8.0, 12.0], [4.0, 5.0], [6.0, np.nan]]) tm.assert_frame_equal(result, expected) + def test_iloc_getitem_singlerow_slice_categoricaldtype_gives_series(self): + # GH#29521 + df = DataFrame({"x": pd.Categorical("a b c d e".split())}) + result = df.iloc[0] + raw_cat = pd.Categorical(["a"], categories=["a", "b", "c", "d", "e"]) + expected = Series(raw_cat, index=["x"], name=0, dtype="category") + + tm.assert_series_equal(result, expected) + class TestILocSetItemDuplicateColumns: def test_iloc_setitem_scalar_duplicate_columns(self): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 3b915f13c7568..dd9657ad65ce7 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -978,6 +978,19 @@ def test_loc_reverse_assignment(self): tm.assert_series_equal(result, expected) + def test_loc_setitem_str_to_small_float_conversion_type(self): + # GH#20388 + np.random.seed(13) + col_data = [str(np.random.random() * 1e-12) for _ in range(5)] + result = DataFrame(col_data, columns=["A"]) + expected = DataFrame(col_data, columns=["A"], dtype=object) + tm.assert_frame_equal(result, expected) + + # change the dtype of the elements from object to float one by one + result.loc[result.index, "A"] = [float(x) for x in col_data] + expected = DataFrame(col_data, columns=["A"], dtype=float) + tm.assert_frame_equal(result, expected) + class TestLocWithMultiIndex: @pytest.mark.parametrize(