diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index a861e0eb52391..b1620df91ba26 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2433,6 +2433,24 @@ def test_datetime_date_tuple_columns_from_dict(self): expected = DataFrame([0, 1, 2], columns=pd.Index(pd.Series([tup]))) tm.assert_frame_equal(result, expected) + def test_construct_with_two_categoricalindex_series(self): + # GH 14600 + s1 = pd.Series( + [39, 6, 4], index=pd.CategoricalIndex(["female", "male", "unknown"]) + ) + s2 = pd.Series( + [2, 152, 2, 242, 150], + index=pd.CategoricalIndex(["f", "female", "m", "male", "unknown"]), + ) + result = pd.DataFrame([s1, s2]) + expected = pd.DataFrame( + np.array( + [[np.nan, 39.0, np.nan, 6.0, 4.0], [2.0, 152.0, 2.0, 242.0, 150.0]] + ), + columns=["f", "female", "m", "male", "unknown"], + ) + tm.assert_frame_equal(result, expected) + class TestDataFrameConstructorWithDatetimeTZ: def test_from_dict(self): diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 708d3429285a8..fc7b9f56002d8 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -792,3 +792,22 @@ def test_apply_multi_level_name(category): ) tm.assert_frame_equal(result, expected) assert df.index.names == ["A", "B"] + + +def test_groupby_apply_datetime_result_dtypes(): + # GH 14849 + data = pd.DataFrame.from_records( + [ + (pd.Timestamp(2016, 1, 1), "red", "dark", 1, "8"), + (pd.Timestamp(2015, 1, 1), "green", "stormy", 2, "9"), + (pd.Timestamp(2014, 1, 1), "blue", "bright", 3, "10"), + (pd.Timestamp(2013, 1, 1), "blue", "calm", 4, "potato"), + ], + columns=["observation", "color", "mood", "intensity", "score"], + ) + result = data.groupby("color").apply(lambda g: g.iloc[0]).dtypes + expected = Series( + [np.dtype("datetime64[ns]"), np.object, np.object, np.int64, np.object], + index=["observation", "color", "mood", "intensity", "score"], + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 7e374811d1960..eb9552fbbebc1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1952,6 +1952,13 @@ def test_shift_bfill_ffill_tz(tz_naive_fixture, op, expected): tm.assert_frame_equal(result, expected) +def test_ffill_missing_arguments(): + # GH 14955 + df = pd.DataFrame({"a": [1, 2], "b": [1, 1]}) + with pytest.raises(ValueError, match="Must specify a fill"): + df.groupby("b").fillna() + + def test_groupby_only_none_group(): # see GH21624 # this was crashing with "ValueError: Length of passed values is 1, index implies 0" diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 78fcd15ab4cc1..4c1436b800fc3 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1002,3 +1002,13 @@ def test_loc_axis_1_slice(): ), ) tm.assert_frame_equal(result, expected) + + +def test_loc_set_dataframe_multiindex(): + # GH 14592 + expected = pd.DataFrame( + "a", index=range(2), columns=pd.MultiIndex.from_product([range(2), range(2)]) + ) + result = expected.copy() + result.loc[0, [(0, 1)]] = result.loc[0, [(0, 1)]] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 3d427dde573af..22c4e38206df6 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -11,6 +11,7 @@ 3. Move the created pickle to "data/legacy_pickle/" directory. """ import bz2 +import datetime import glob import gzip import os @@ -487,3 +488,17 @@ def open(self, *args): df.to_pickle(mockurl) result = pd.read_pickle(mockurl) tm.assert_frame_equal(df, result) + + +class MyTz(datetime.tzinfo): + def __init__(self): + pass + + +def test_read_pickle_with_subclass(): + # GH 12163 + expected = pd.Series(dtype=object), MyTz() + result = tm.round_trip_pickle(expected) + + tm.assert_series_equal(result[0], expected[0]) + assert isinstance(result[1], MyTz) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index d760939657d47..2651c3d73c9ab 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1115,6 +1115,15 @@ def create_data(constructor): tm.assert_series_equal(result_datetime, expected) tm.assert_series_equal(result_Timestamp, expected) + def test_contructor_dict_tuple_indexer(self): + # GH 12948 + data = {(1, 1, None): -1.0} + result = Series(data) + expected = Series( + -1.0, index=MultiIndex(levels=[[1], [1], [np.nan]], codes=[[0], [0], [-1]]) + ) + tm.assert_series_equal(result, expected) + def test_constructor_mapping(self, non_mapping_dict_subclass): # GH 29788 ndm = non_mapping_dict_subclass({3: "three"}) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 5382ad84bcca2..1adc5011a0c31 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2147,6 +2147,40 @@ def test_sort_index_level_mixed(self): sorted_after.drop([("foo", "three")], axis=1), ) + def test_sort_index_categorical_multiindex(self): + # GH 15058 + df = DataFrame( + { + "a": range(6), + "l1": pd.Categorical( + ["a", "a", "b", "b", "c", "c"], + categories=["c", "a", "b"], + ordered=True, + ), + "l2": [0, 1, 0, 1, 0, 1], + } + ) + result = df.set_index(["l1", "l2"]).sort_index() + expected = DataFrame( + [4, 5, 0, 1, 2, 3], + columns=["a"], + index=MultiIndex( + levels=[ + pd.CategoricalIndex( + ["c", "a", "b"], + categories=["c", "a", "b"], + ordered=True, + name="l1", + dtype="category", + ), + [0, 1], + ], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=["l1", "l2"], + ), + ) + tm.assert_frame_equal(result, expected) + def test_is_lexsorted(self): levels = [[0, 1], [0, 1, 2]]