Skip to content

Commit 6c3a091

Browse files
jbrockmendelKevin D Smith
authored and
Kevin D Smith
committed
TST/REF: collect tests by method (pandas-dev#37403)
1 parent ed337d9 commit 6c3a091

11 files changed

+332
-316
lines changed

pandas/tests/arrays/categorical/test_indexing.py

+16
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,22 @@ def test_setitem_tuple(self):
9191
cat[1] = cat[0]
9292
assert cat[1] == (0, 1)
9393

94+
def test_setitem_listlike(self):
95+
96+
# GH#9469
97+
# properly coerce the input indexers
98+
np.random.seed(1)
99+
c = Categorical(
100+
np.random.randint(0, 5, size=150000).astype(np.int8)
101+
).add_categories([-1000])
102+
indexer = np.array([100000]).astype(np.int64)
103+
c[indexer] = -1000
104+
105+
# we are asserting the code result here
106+
# which maps to the -1000 category
107+
result = c.codes[np.array([100000]).astype(np.int64)]
108+
tm.assert_numpy_array_equal(result, np.array([5], dtype="int8"))
109+
94110

95111
class TestCategoricalIndexing:
96112
def test_getitem_slice(self):

pandas/tests/frame/methods/test_reindex.py

+102-1
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,18 @@
55
import pytest
66

77
import pandas as pd
8-
from pandas import Categorical, DataFrame, Index, MultiIndex, Series, date_range, isna
8+
from pandas import (
9+
Categorical,
10+
CategoricalIndex,
11+
DataFrame,
12+
Index,
13+
MultiIndex,
14+
Series,
15+
date_range,
16+
isna,
17+
)
918
import pandas._testing as tm
19+
from pandas.api.types import CategoricalDtype as CDT
1020
import pandas.core.common as com
1121

1222

@@ -745,3 +755,94 @@ def test_reindex_multi_categorical_time(self):
745755
result = df2.reindex(midx)
746756
expected = DataFrame({"a": [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx)
747757
tm.assert_frame_equal(result, expected)
758+
759+
def test_reindex_with_categoricalindex(self):
760+
df = DataFrame(
761+
{
762+
"A": np.arange(3, dtype="int64"),
763+
},
764+
index=CategoricalIndex(list("abc"), dtype=CDT(list("cabe")), name="B"),
765+
)
766+
767+
# reindexing
768+
# convert to a regular index
769+
result = df.reindex(["a", "b", "e"])
770+
expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
771+
"B"
772+
)
773+
tm.assert_frame_equal(result, expected, check_index_type=True)
774+
775+
result = df.reindex(["a", "b"])
776+
expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
777+
tm.assert_frame_equal(result, expected, check_index_type=True)
778+
779+
result = df.reindex(["e"])
780+
expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
781+
tm.assert_frame_equal(result, expected, check_index_type=True)
782+
783+
result = df.reindex(["d"])
784+
expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B")
785+
tm.assert_frame_equal(result, expected, check_index_type=True)
786+
787+
# since we are actually reindexing with a Categorical
788+
# then return a Categorical
789+
cats = list("cabe")
790+
791+
result = df.reindex(Categorical(["a", "e"], categories=cats))
792+
expected = DataFrame(
793+
{"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats))}
794+
).set_index("B")
795+
tm.assert_frame_equal(result, expected, check_index_type=True)
796+
797+
result = df.reindex(Categorical(["a"], categories=cats))
798+
expected = DataFrame(
799+
{"A": [0], "B": Series(list("a")).astype(CDT(cats))}
800+
).set_index("B")
801+
tm.assert_frame_equal(result, expected, check_index_type=True)
802+
803+
result = df.reindex(["a", "b", "e"])
804+
expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
805+
"B"
806+
)
807+
tm.assert_frame_equal(result, expected, check_index_type=True)
808+
809+
result = df.reindex(["a", "b"])
810+
expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
811+
tm.assert_frame_equal(result, expected, check_index_type=True)
812+
813+
result = df.reindex(["e"])
814+
expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
815+
tm.assert_frame_equal(result, expected, check_index_type=True)
816+
817+
# give back the type of categorical that we received
818+
result = df.reindex(Categorical(["a", "e"], categories=cats, ordered=True))
819+
expected = DataFrame(
820+
{"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats, ordered=True))}
821+
).set_index("B")
822+
tm.assert_frame_equal(result, expected, check_index_type=True)
823+
824+
result = df.reindex(Categorical(["a", "d"], categories=["a", "d"]))
825+
expected = DataFrame(
826+
{"A": [0, np.nan], "B": Series(list("ad")).astype(CDT(["a", "d"]))}
827+
).set_index("B")
828+
tm.assert_frame_equal(result, expected, check_index_type=True)
829+
830+
df2 = DataFrame(
831+
{
832+
"A": np.arange(6, dtype="int64"),
833+
},
834+
index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cabe")), name="B"),
835+
)
836+
# passed duplicate indexers are not allowed
837+
msg = "cannot reindex from a duplicate axis"
838+
with pytest.raises(ValueError, match=msg):
839+
df2.reindex(["a", "b"])
840+
841+
# args NotImplemented ATM
842+
msg = r"argument {} is not implemented for CategoricalIndex\.reindex"
843+
with pytest.raises(NotImplementedError, match=msg.format("method")):
844+
df.reindex(["a"], method="ffill")
845+
with pytest.raises(NotImplementedError, match=msg.format("level")):
846+
df.reindex(["a"], level=1)
847+
with pytest.raises(NotImplementedError, match=msg.format("limit")):
848+
df.reindex(["a"], limit=2)

pandas/tests/frame/methods/test_sort_index.py

+30-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pandas as pd
55
from pandas import (
66
CategoricalDtype,
7+
CategoricalIndex,
78
DataFrame,
89
Index,
910
IntervalIndex,
@@ -495,7 +496,7 @@ def test_sort_index_categorical_multiindex(self):
495496
columns=["a"],
496497
index=MultiIndex(
497498
levels=[
498-
pd.CategoricalIndex(
499+
CategoricalIndex(
499500
["c", "a", "b"],
500501
categories=["c", "a", "b"],
501502
ordered=True,
@@ -736,6 +737,34 @@ def test_sort_index_multilevel_repr_8017(self, gen, extra):
736737
result = result.sort_index(axis=1)
737738
tm.assert_frame_equal(result, expected)
738739

740+
@pytest.mark.parametrize(
741+
"categories",
742+
[
743+
pytest.param(["a", "b", "c"], id="str"),
744+
pytest.param(
745+
[pd.Interval(0, 1), pd.Interval(1, 2), pd.Interval(2, 3)],
746+
id="pd.Interval",
747+
),
748+
],
749+
)
750+
def test_sort_index_with_categories(self, categories):
751+
# GH#23452
752+
df = DataFrame(
753+
{"foo": range(len(categories))},
754+
index=CategoricalIndex(
755+
data=categories, categories=categories, ordered=True
756+
),
757+
)
758+
df.index = df.index.reorder_categories(df.index.categories[::-1])
759+
result = df.sort_index()
760+
expected = DataFrame(
761+
{"foo": reversed(range(len(categories)))},
762+
index=CategoricalIndex(
763+
data=categories[::-1], categories=categories[::-1], ordered=True
764+
),
765+
)
766+
tm.assert_frame_equal(result, expected)
767+
739768

740769
class TestDataFrameSortIndexKey:
741770
def test_sort_multi_index_key(self):

pandas/tests/frame/methods/test_sort_values.py

+89
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
from pandas.errors import PerformanceWarning
7+
68
import pandas as pd
79
from pandas import Categorical, DataFrame, NaT, Timestamp, date_range
810
import pandas._testing as tm
@@ -711,3 +713,90 @@ def sorter(key):
711713
)
712714

713715
tm.assert_frame_equal(result, expected)
716+
717+
718+
@pytest.fixture
719+
def df_none():
720+
return DataFrame(
721+
{
722+
"outer": ["a", "a", "a", "b", "b", "b"],
723+
"inner": [1, 2, 2, 2, 1, 1],
724+
"A": np.arange(6, 0, -1),
725+
("B", 5): ["one", "one", "two", "two", "one", "one"],
726+
}
727+
)
728+
729+
730+
@pytest.fixture(params=[["outer"], ["outer", "inner"]])
731+
def df_idx(request, df_none):
732+
levels = request.param
733+
return df_none.set_index(levels)
734+
735+
736+
@pytest.fixture(
737+
params=[
738+
"inner", # index level
739+
["outer"], # list of index level
740+
"A", # column
741+
[("B", 5)], # list of column
742+
["inner", "outer"], # two index levels
743+
[("B", 5), "outer"], # index level and column
744+
["A", ("B", 5)], # Two columns
745+
["inner", "outer"], # two index levels and column
746+
]
747+
)
748+
def sort_names(request):
749+
return request.param
750+
751+
752+
@pytest.fixture(params=[True, False])
753+
def ascending(request):
754+
return request.param
755+
756+
757+
class TestSortValuesLevelAsStr:
758+
def test_sort_index_level_and_column_label(
759+
self, df_none, df_idx, sort_names, ascending
760+
):
761+
# GH#14353
762+
763+
# Get index levels from df_idx
764+
levels = df_idx.index.names
765+
766+
# Compute expected by sorting on columns and the setting index
767+
expected = df_none.sort_values(
768+
by=sort_names, ascending=ascending, axis=0
769+
).set_index(levels)
770+
771+
# Compute result sorting on mix on columns and index levels
772+
result = df_idx.sort_values(by=sort_names, ascending=ascending, axis=0)
773+
774+
tm.assert_frame_equal(result, expected)
775+
776+
def test_sort_column_level_and_index_label(
777+
self, df_none, df_idx, sort_names, ascending
778+
):
779+
# GH#14353
780+
781+
# Get levels from df_idx
782+
levels = df_idx.index.names
783+
784+
# Compute expected by sorting on axis=0, setting index levels, and then
785+
# transposing. For some cases this will result in a frame with
786+
# multiple column levels
787+
expected = (
788+
df_none.sort_values(by=sort_names, ascending=ascending, axis=0)
789+
.set_index(levels)
790+
.T
791+
)
792+
793+
# Compute result by transposing and sorting on axis=1.
794+
result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1)
795+
796+
if len(levels) > 1:
797+
# Accessing multi-level columns that are not lexsorted raises a
798+
# performance warning
799+
with tm.assert_produces_warning(PerformanceWarning, check_stacklevel=False):
800+
tm.assert_frame_equal(result, expected)
801+
else:
802+
tm.assert_frame_equal(result, expected)

pandas/tests/frame/test_constructors.py

+7
Original file line numberDiff line numberDiff line change
@@ -2699,6 +2699,13 @@ def test_frame_ctor_datetime64_column(self):
26992699

27002700

27012701
class TestDataFrameConstructorWithDatetimeTZ:
2702+
def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture):
2703+
# GH#25843
2704+
tz = tz_aware_fixture
2705+
result = DataFrame({"d": [Timestamp("2019", tz=tz)]}, dtype="datetime64[ns]")
2706+
expected = DataFrame({"d": [Timestamp("2019")]})
2707+
tm.assert_frame_equal(result, expected)
2708+
27022709
def test_from_dict(self):
27032710

27042711
# 8260

pandas/tests/frame/test_join.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pytest
55

66
import pandas as pd
7-
from pandas import DataFrame, Index, MultiIndex, period_range
7+
from pandas import DataFrame, Index, MultiIndex, date_range, period_range
88
import pandas._testing as tm
99

1010

@@ -341,3 +341,24 @@ def test_merge_join_different_levels(self):
341341
with tm.assert_produces_warning(UserWarning):
342342
result = df1.join(df2, on="a")
343343
tm.assert_frame_equal(result, expected)
344+
345+
def test_frame_join_tzaware(self):
346+
test1 = DataFrame(
347+
np.zeros((6, 3)),
348+
index=date_range(
349+
"2012-11-15 00:00:00", periods=6, freq="100L", tz="US/Central"
350+
),
351+
)
352+
test2 = DataFrame(
353+
np.zeros((3, 3)),
354+
index=date_range(
355+
"2012-11-15 00:00:00", periods=3, freq="250L", tz="US/Central"
356+
),
357+
columns=range(3, 6),
358+
)
359+
360+
result = test1.join(test2, how="outer")
361+
expected = test1.index.union(test2.index)
362+
363+
tm.assert_index_equal(result.index, expected)
364+
assert result.index.tz.zone == "US/Central"

0 commit comments

Comments
 (0)