Skip to content

TST/REF: split/collect large tests #39789

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions pandas/tests/frame/indexing/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
MultiIndex,
Series,
Timestamp,
concat,
get_dummies,
period_range,
)
Expand Down Expand Up @@ -176,6 +177,87 @@ def test_getitem_bool_mask_categorical_index(self):
with pytest.raises(TypeError, match=msg):
df4[df4.index > 1]

@pytest.mark.parametrize(
"data1,data2,expected_data",
(
(
[[1, 2], [3, 4]],
[[0.5, 6], [7, 8]],
[[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]],
),
(
[[1, 2], [3, 4]],
[[5, 6], [7, 8]],
[[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]],
),
),
)
def test_getitem_bool_mask_duplicate_columns_mixed_dtypes(
self,
data1,
data2,
expected_data,
):
# GH#31954

df1 = DataFrame(np.array(data1))
df2 = DataFrame(np.array(data2))
df = concat([df1, df2], axis=1)

result = df[df > 2]

exdict = {i: np.array(col) for i, col in enumerate(expected_data)}
expected = DataFrame(exdict).rename(columns={2: 0, 3: 1})
tm.assert_frame_equal(result, expected)

@pytest.fixture
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally can you put fixtures at the top of the class

def df_dup_cols(self):
dups = ["A", "A", "C", "D"]
df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64")
return df

def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_cols):
# `df.A > 6` is a DataFrame with a different shape from df

# boolean with the duplicate raises
df = df_dup_cols
msg = "cannot reindex from a duplicate axis"
with pytest.raises(ValueError, match=msg):
df[df.A > 6]

def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols):
# boolean indexing
# GH#4879
df = DataFrame(
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
)
expected = df[df.C > 6]
expected.columns = df_dup_cols.columns

df = df_dup_cols
result = df[df.C > 6]

tm.assert_frame_equal(result, expected)
result.dtypes
str(result)

def test_getitem_boolean_frame_with_duplicate_columns(self, df_dup_cols):

# where
df = DataFrame(
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
)
# `df > 6` is a DataFrame with the same shape+alignment as df
expected = df[df > 6]
expected.columns = df_dup_cols.columns

df = df_dup_cols
result = df[df > 6]

tm.assert_frame_equal(result, expected)
result.dtypes
str(result)


class TestGetitemSlice:
def test_getitem_slice_float64(self, frame_or_series):
Expand Down
92 changes: 92 additions & 0 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,26 @@
import pytest

from pandas.core.dtypes.base import registry as ea_registry
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_interval_dtype,
is_object_dtype,
)
from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype

from pandas import (
Categorical,
DataFrame,
DatetimeIndex,
Index,
Interval,
IntervalIndex,
NaT,
Period,
PeriodIndex,
Series,
Timestamp,
cut,
date_range,
notna,
period_range,
Expand Down Expand Up @@ -395,6 +403,90 @@ def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self):
with pytest.raises(ValueError, match=msg):
df[["a", "b"]] = rhs

def test_setitem_intervals(self):

df = DataFrame({"A": range(10)})
ser = cut(df["A"], 5)
assert isinstance(ser.cat.categories, IntervalIndex)

# B & D end up as Categoricals
# the remainer are converted to in-line objects
# contining an IntervalIndex.values
df["B"] = ser
df["C"] = np.array(ser)
df["D"] = ser.values
df["E"] = np.array(ser.values)

assert is_categorical_dtype(df["B"].dtype)
assert is_interval_dtype(df["B"].cat.categories)
assert is_categorical_dtype(df["D"].dtype)
assert is_interval_dtype(df["D"].cat.categories)

assert is_object_dtype(df["C"])
assert is_object_dtype(df["E"])

# they compare equal as Index
# when converted to numpy objects
c = lambda x: Index(np.array(x))
tm.assert_index_equal(c(df.B), c(df.B))
tm.assert_index_equal(c(df.B), c(df.C), check_names=False)
tm.assert_index_equal(c(df.B), c(df.D), check_names=False)
tm.assert_index_equal(c(df.C), c(df.D), check_names=False)

# B & D are the same Series
tm.assert_series_equal(df["B"], df["B"])
tm.assert_series_equal(df["B"], df["D"], check_names=False)

# C & E are the same Series
tm.assert_series_equal(df["C"], df["C"])
tm.assert_series_equal(df["C"], df["E"], check_names=False)


class TestSetitemTZAwareValues:
@pytest.fixture
def idx(self):
naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B")
idx = naive.tz_localize("US/Pacific")
return idx

@pytest.fixture
def expected(self, idx):
expected = Series(np.array(idx.tolist(), dtype="object"), name="B")
assert expected.dtype == idx.dtype
return expected

def test_setitem_dt64series(self, idx, expected):
# convert to utc
df = DataFrame(np.random.randn(2, 1), columns=["A"])
df["B"] = idx

with tm.assert_produces_warning(FutureWarning) as m:
df["B"] = idx.to_series(keep_tz=False, index=[0, 1])
msg = "do 'idx.tz_convert(None)' before calling"
assert msg in str(m[0].message)

result = df["B"]
comp = Series(idx.tz_convert("UTC").tz_localize(None), name="B")
tm.assert_series_equal(result, comp)

def test_setitem_datetimeindex(self, idx, expected):
# setting a DataFrame column with a tzaware DTI retains the dtype
df = DataFrame(np.random.randn(2, 1), columns=["A"])

# assign to frame
df["B"] = idx
result = df["B"]
tm.assert_series_equal(result, expected)

def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected):
# setting a DataFrame column with a tzaware DTI retains the dtype
df = DataFrame(np.random.randn(2, 1), columns=["A"])

# object array of datetimes with a tz
df["B"] = idx.to_pydatetime()
result = df["B"]
tm.assert_series_equal(result, expected)


class TestDataFrameSetItemWithExpansion:
def test_setitem_listlike_views(self):
Expand Down
37 changes: 37 additions & 0 deletions pandas/tests/frame/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,43 @@
import pandas.core.common as com


class TestReindexSetIndex:
# Tests that check both reindex and set_index

def test_dti_set_index_reindex_datetimeindex(self):
# GH#6631
df = DataFrame(np.random.random(6))
idx1 = date_range("2011/01/01", periods=6, freq="M", tz="US/Eastern")
idx2 = date_range("2013", periods=6, freq="A", tz="Asia/Tokyo")

df = df.set_index(idx1)
tm.assert_index_equal(df.index, idx1)
df = df.reindex(idx2)
tm.assert_index_equal(df.index, idx2)

def test_dti_set_index_reindex_freq_with_tz(self):
# GH#11314 with tz
index = date_range(
datetime(2015, 10, 1), datetime(2015, 10, 1, 23), freq="H", tz="US/Eastern"
)
df = DataFrame(np.random.randn(24, 1), columns=["a"], index=index)
new_index = date_range(
datetime(2015, 10, 2), datetime(2015, 10, 2, 23), freq="H", tz="US/Eastern"
)

result = df.set_index(new_index)
assert result.index.freq == index.freq

def test_set_reset_index_intervalindex(self):

df = DataFrame({"A": range(10)})
ser = pd.cut(df.A, 5)
df["B"] = ser
df = df.set_index("B")

df = df.reset_index()


class TestDataFrameSelectReindex:
# These are specific reindex-based tests; other indexing tests should go in
# test_indexing
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/frame/methods/test_set_index.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
See also: test_reindex.py:TestReindexSetIndex
"""

from datetime import datetime, timedelta

import numpy as np
Expand Down
Loading