Skip to content

TST: collect tests by method #37300

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 86 additions & 1 deletion pandas/tests/frame/methods/test_count.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from pandas import DataFrame, Series
import numpy as np
import pytest

from pandas import DataFrame, Index, Series
import pandas._testing as tm


Expand Down Expand Up @@ -34,3 +37,85 @@ def test_count_objects(self, float_string_frame):

tm.assert_series_equal(dm.count(), df.count())
tm.assert_series_equal(dm.count(1), df.count(1))

def test_count_level_corner(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data

ser = frame["A"][:0]
result = ser.count(level=0)
expected = Series(0, index=ser.index.levels[0], name="A")
tm.assert_series_equal(result, expected)

df = frame[:0]
result = df.count(level=0)
expected = (
DataFrame(
index=ser.index.levels[0].set_names(["first"]), columns=df.columns
)
.fillna(0)
.astype(np.int64)
)
tm.assert_frame_equal(result, expected)

def test_count_index_with_nan(self):
# https://github.com/pandas-dev/pandas/issues/21824
df = DataFrame(
{
"Person": ["John", "Myla", None, "John", "Myla"],
"Age": [24.0, 5, 21.0, 33, 26],
"Single": [False, True, True, True, False],
}
)

# count on row labels
res = df.set_index(["Person", "Single"]).count(level="Person")
expected = DataFrame(
index=Index(["John", "Myla"], name="Person"),
columns=Index(["Age"]),
data=[2, 2],
)
tm.assert_frame_equal(res, expected)

# count on column labels
res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1)
expected = DataFrame(
columns=Index(["John", "Myla"], name="Person"),
index=Index(["Age"]),
data=[[2, 2]],
)
tm.assert_frame_equal(res, expected)

def test_count_level(
self,
multiindex_year_month_day_dataframe_random_data,
multiindex_dataframe_random_data,
):
ymd = multiindex_year_month_day_dataframe_random_data
frame = multiindex_dataframe_random_data

def _check_counts(frame, axis=0):
index = frame._get_axis(axis)
for i in range(index.nlevels):
result = frame.count(axis=axis, level=i)
expected = frame.groupby(axis=axis, level=i).count()
expected = expected.reindex_like(result).astype("i8")
tm.assert_frame_equal(result, expected)

frame.iloc[1, [1, 2]] = np.nan
frame.iloc[7, [0, 1]] = np.nan
ymd.iloc[1, [1, 2]] = np.nan
ymd.iloc[7, [0, 1]] = np.nan

_check_counts(frame)
_check_counts(ymd)
_check_counts(frame.T, axis=1)
_check_counts(ymd.T, axis=1)

# can't call with level on regular DataFrame
df = tm.makeTimeDataFrame()
with pytest.raises(TypeError, match="hierarchical"):
df.count(level=0)

frame["D"] = "foo"
result = frame.count(level=0, numeric_only=True)
tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp"))
29 changes: 28 additions & 1 deletion pandas/tests/frame/methods/test_pop.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from pandas import DataFrame, Series
import numpy as np

from pandas import DataFrame, MultiIndex, Series
import pandas._testing as tm


Expand Down Expand Up @@ -38,3 +40,28 @@ def test_pop_non_unique_cols(self):
assert "b" in df.columns
assert "a" not in df.columns
assert len(df.index) == 2

def test_mixed_depth_pop(self):
arrays = [
["a", "top", "top", "routine1", "routine1", "routine2"],
["", "OD", "OD", "result1", "result2", "result1"],
["", "wx", "wy", "", "", ""],
]

tuples = sorted(zip(*arrays))
index = MultiIndex.from_tuples(tuples)
df = DataFrame(np.random.randn(4, 6), columns=index)

df1 = df.copy()
df2 = df.copy()
result = df1.pop("a")
expected = df2.pop(("a", "", ""))
tm.assert_series_equal(expected, result, check_names=False)
tm.assert_frame_equal(df1, df2)
assert result.name == "a"

expected = df1["top"]
df1 = df1.drop(["top"], axis=1)
result = df2.pop("top")
tm.assert_frame_equal(expected, result)
tm.assert_frame_equal(df1, df2)
117 changes: 116 additions & 1 deletion pandas/tests/frame/methods/test_set_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,16 @@
import numpy as np
import pytest

from pandas import DataFrame, DatetimeIndex, Index, MultiIndex, Series, date_range
from pandas import (
DataFrame,
DatetimeIndex,
Index,
MultiIndex,
Series,
date_range,
period_range,
to_datetime,
)
import pandas._testing as tm


Expand Down Expand Up @@ -352,6 +361,112 @@ def test_construction_with_categorical_index(self):
idf = idf.reset_index().set_index("B")
tm.assert_index_equal(idf.index, ci)

def test_set_index_datetime(self):
# GH#3950
df = DataFrame(
{
"label": ["a", "a", "a", "b", "b", "b"],
"datetime": [
"2011-07-19 07:00:00",
"2011-07-19 08:00:00",
"2011-07-19 09:00:00",
"2011-07-19 07:00:00",
"2011-07-19 08:00:00",
"2011-07-19 09:00:00",
],
"value": range(6),
}
)
df.index = to_datetime(df.pop("datetime"), utc=True)
df.index = df.index.tz_convert("US/Pacific")

expected = DatetimeIndex(
["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"],
name="datetime",
)
expected = expected.tz_localize("UTC").tz_convert("US/Pacific")

df = df.set_index("label", append=True)
tm.assert_index_equal(df.index.levels[0], expected)
tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label"))
assert df.index.names == ["datetime", "label"]

df = df.swaplevel(0, 1)
tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label"))
tm.assert_index_equal(df.index.levels[1], expected)
assert df.index.names == ["label", "datetime"]

df = DataFrame(np.random.random(6))
idx1 = DatetimeIndex(
[
"2011-07-19 07:00:00",
"2011-07-19 08:00:00",
"2011-07-19 09:00:00",
"2011-07-19 07:00:00",
"2011-07-19 08:00:00",
"2011-07-19 09:00:00",
],
tz="US/Eastern",
)
idx2 = DatetimeIndex(
[
"2012-04-01 09:00",
"2012-04-01 09:00",
"2012-04-01 09:00",
"2012-04-02 09:00",
"2012-04-02 09:00",
"2012-04-02 09:00",
],
tz="US/Eastern",
)
idx3 = date_range("2011-01-01 09:00", periods=6, tz="Asia/Tokyo")
idx3 = idx3._with_freq(None)

df = df.set_index(idx1)
df = df.set_index(idx2, append=True)
df = df.set_index(idx3, append=True)

expected1 = DatetimeIndex(
["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"],
tz="US/Eastern",
)
expected2 = DatetimeIndex(
["2012-04-01 09:00", "2012-04-02 09:00"], tz="US/Eastern"
)

tm.assert_index_equal(df.index.levels[0], expected1)
tm.assert_index_equal(df.index.levels[1], expected2)
tm.assert_index_equal(df.index.levels[2], idx3)

# GH#7092
tm.assert_index_equal(df.index.get_level_values(0), idx1)
tm.assert_index_equal(df.index.get_level_values(1), idx2)
tm.assert_index_equal(df.index.get_level_values(2), idx3)

def test_set_index_period(self):
# GH#6631
df = DataFrame(np.random.random(6))
idx1 = period_range("2011-01-01", periods=3, freq="M")
idx1 = idx1.append(idx1)
idx2 = period_range("2013-01-01 09:00", periods=2, freq="H")
idx2 = idx2.append(idx2).append(idx2)
idx3 = period_range("2005", periods=6, freq="A")

df = df.set_index(idx1)
df = df.set_index(idx2, append=True)
df = df.set_index(idx3, append=True)

expected1 = period_range("2011-01-01", periods=3, freq="M")
expected2 = period_range("2013-01-01 09:00", periods=2, freq="H")

tm.assert_index_equal(df.index.levels[0], expected1)
tm.assert_index_equal(df.index.levels[1], expected2)
tm.assert_index_equal(df.index.levels[2], idx3)

tm.assert_index_equal(df.index.get_level_values(0), idx1)
tm.assert_index_equal(df.index.get_level_values(1), idx2)
tm.assert_index_equal(df.index.get_level_values(2), idx3)


class TestSetIndexInvalid:
def test_set_index_verify_integrity(self, frame_of_index_cols):
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/frame/methods/test_sort_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,12 @@ def test_sort_index_level_mixed(self):
sorted_after.drop([("foo", "three")], axis=1),
)

def test_sort_index_preserve_levels(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data

result = frame.sort_index()
assert result.index.names == frame.index.names


class TestDataFrameSortIndexKey:
def test_sort_multi_index_key(self):
Expand Down
34 changes: 34 additions & 0 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1195,6 +1195,40 @@ def test_preserve_timezone(self, initial: str, method):
result = getattr(df, method)(axis=1)
tm.assert_series_equal(result, expected)

def test_frame_any_all_with_level(self):
df = DataFrame(
{"data": [False, False, True, False, True, False, True]},
index=[
["one", "one", "two", "one", "two", "two", "two"],
[0, 1, 0, 2, 1, 2, 3],
],
)

result = df.any(level=0)
ex = DataFrame({"data": [False, True]}, index=["one", "two"])
tm.assert_frame_equal(result, ex)

result = df.all(level=0)
ex = DataFrame({"data": [False, False]}, index=["one", "two"])
tm.assert_frame_equal(result, ex)

def test_frame_any_with_timedelta(self):
# GH#17667
df = DataFrame(
{
"a": Series([0, 0]),
"t": Series([pd.to_timedelta(0, "s"), pd.to_timedelta(1, "ms")]),
}
)

result = df.any(axis=0)
expected = Series(data=[False, True], index=["a", "t"])
tm.assert_series_equal(result, expected)

result = df.any(axis=1)
expected = Series(data=[False, True])
tm.assert_series_equal(result, expected)


def test_mixed_frame_with_integer_sum():
# https://github.com/pandas-dev/pandas/issues/34520
Expand Down
15 changes: 14 additions & 1 deletion pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import pytest

from pandas import MultiIndex, NaT, Series, date_range
from pandas import MultiIndex, NaT, Series, date_range, period_range
import pandas.testing as tm


Expand All @@ -26,3 +27,15 @@ def test_setitem_multiindex_empty_slice(self):
expected = result.copy()
result.loc[[]] = 0
tm.assert_series_equal(result, expected)


class TestSetitemPeriodDtype:
@pytest.mark.parametrize("na_val", [None, np.nan])
def test_setitem_na_period_dtype_casts_to_nat(self, na_val):
ser = Series(period_range("2000-01-01", periods=10, freq="D"))

ser[3] = na_val
assert ser[3] is NaT

ser[3:5] = na_val
assert ser[4] is NaT
9 changes: 8 additions & 1 deletion pandas/tests/series/methods/test_repeat.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pytest

from pandas import Series
from pandas import MultiIndex, Series
import pandas._testing as tm


Expand All @@ -28,3 +28,10 @@ def test_numpy_repeat(self):
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.repeat(ser, 2, axis=0)

def test_repeat_with_multiindex(self):
# GH#9361, fixed by GH#7891
m_idx = MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8)])
data = ["a", "b", "c", "d"]
m_df = Series(data, index=m_idx)
assert m_df.repeat(3).shape == (3 * len(data),)
15 changes: 0 additions & 15 deletions pandas/tests/series/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,21 +49,6 @@ def test_NaT_cast(self):
expected = Series([pd.NaT], dtype="period[D]")
tm.assert_series_equal(result, expected)

def test_set_none(self):
self.series[3] = None
assert self.series[3] is pd.NaT

self.series[3:5] = None
assert self.series[4] is pd.NaT

def test_set_nan(self):
# Do we want to allow this?
self.series[5] = np.nan
assert self.series[5] is pd.NaT

self.series[5:7] = np.nan
assert self.series[6] is pd.NaT

def test_intercept_astype_object(self):
expected = self.series.astype("object")

Expand Down
Loading