Skip to content

Commit 6646f7c

Browse files
jbrockmendelznicholls
authored andcommitted
TST/REF: split/collect large tests (pandas-dev#39789)
1 parent 5092a07 commit 6646f7c

File tree

8 files changed

+291
-245
lines changed

8 files changed

+291
-245
lines changed

pandas/tests/frame/indexing/test_getitem.py

+82
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
MultiIndex,
1111
Series,
1212
Timestamp,
13+
concat,
1314
get_dummies,
1415
period_range,
1516
)
@@ -176,6 +177,87 @@ def test_getitem_bool_mask_categorical_index(self):
176177
with pytest.raises(TypeError, match=msg):
177178
df4[df4.index > 1]
178179

180+
@pytest.mark.parametrize(
181+
"data1,data2,expected_data",
182+
(
183+
(
184+
[[1, 2], [3, 4]],
185+
[[0.5, 6], [7, 8]],
186+
[[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]],
187+
),
188+
(
189+
[[1, 2], [3, 4]],
190+
[[5, 6], [7, 8]],
191+
[[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]],
192+
),
193+
),
194+
)
195+
def test_getitem_bool_mask_duplicate_columns_mixed_dtypes(
196+
self,
197+
data1,
198+
data2,
199+
expected_data,
200+
):
201+
# GH#31954
202+
203+
df1 = DataFrame(np.array(data1))
204+
df2 = DataFrame(np.array(data2))
205+
df = concat([df1, df2], axis=1)
206+
207+
result = df[df > 2]
208+
209+
exdict = {i: np.array(col) for i, col in enumerate(expected_data)}
210+
expected = DataFrame(exdict).rename(columns={2: 0, 3: 1})
211+
tm.assert_frame_equal(result, expected)
212+
213+
@pytest.fixture
214+
def df_dup_cols(self):
215+
dups = ["A", "A", "C", "D"]
216+
df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64")
217+
return df
218+
219+
def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_cols):
220+
# `df.A > 6` is a DataFrame with a different shape from df
221+
222+
# boolean with the duplicate raises
223+
df = df_dup_cols
224+
msg = "cannot reindex from a duplicate axis"
225+
with pytest.raises(ValueError, match=msg):
226+
df[df.A > 6]
227+
228+
def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols):
229+
# boolean indexing
230+
# GH#4879
231+
df = DataFrame(
232+
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
233+
)
234+
expected = df[df.C > 6]
235+
expected.columns = df_dup_cols.columns
236+
237+
df = df_dup_cols
238+
result = df[df.C > 6]
239+
240+
tm.assert_frame_equal(result, expected)
241+
result.dtypes
242+
str(result)
243+
244+
def test_getitem_boolean_frame_with_duplicate_columns(self, df_dup_cols):
245+
246+
# where
247+
df = DataFrame(
248+
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
249+
)
250+
# `df > 6` is a DataFrame with the same shape+alignment as df
251+
expected = df[df > 6]
252+
expected.columns = df_dup_cols.columns
253+
254+
df = df_dup_cols
255+
result = df[df > 6]
256+
257+
tm.assert_frame_equal(result, expected)
258+
result.dtypes
259+
str(result)
260+
179261

180262
class TestGetitemSlice:
181263
def test_getitem_slice_float64(self, frame_or_series):

pandas/tests/frame/indexing/test_setitem.py

+92
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,26 @@
22
import pytest
33

44
from pandas.core.dtypes.base import registry as ea_registry
5+
from pandas.core.dtypes.common import (
6+
is_categorical_dtype,
7+
is_interval_dtype,
8+
is_object_dtype,
9+
)
510
from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype
611

712
from pandas import (
813
Categorical,
914
DataFrame,
15+
DatetimeIndex,
1016
Index,
1117
Interval,
18+
IntervalIndex,
1219
NaT,
1320
Period,
1421
PeriodIndex,
1522
Series,
1623
Timestamp,
24+
cut,
1725
date_range,
1826
notna,
1927
period_range,
@@ -395,6 +403,90 @@ def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self):
395403
with pytest.raises(ValueError, match=msg):
396404
df[["a", "b"]] = rhs
397405

406+
def test_setitem_intervals(self):
407+
408+
df = DataFrame({"A": range(10)})
409+
ser = cut(df["A"], 5)
410+
assert isinstance(ser.cat.categories, IntervalIndex)
411+
412+
# B & D end up as Categoricals
413+
# the remainer are converted to in-line objects
414+
# contining an IntervalIndex.values
415+
df["B"] = ser
416+
df["C"] = np.array(ser)
417+
df["D"] = ser.values
418+
df["E"] = np.array(ser.values)
419+
420+
assert is_categorical_dtype(df["B"].dtype)
421+
assert is_interval_dtype(df["B"].cat.categories)
422+
assert is_categorical_dtype(df["D"].dtype)
423+
assert is_interval_dtype(df["D"].cat.categories)
424+
425+
assert is_object_dtype(df["C"])
426+
assert is_object_dtype(df["E"])
427+
428+
# they compare equal as Index
429+
# when converted to numpy objects
430+
c = lambda x: Index(np.array(x))
431+
tm.assert_index_equal(c(df.B), c(df.B))
432+
tm.assert_index_equal(c(df.B), c(df.C), check_names=False)
433+
tm.assert_index_equal(c(df.B), c(df.D), check_names=False)
434+
tm.assert_index_equal(c(df.C), c(df.D), check_names=False)
435+
436+
# B & D are the same Series
437+
tm.assert_series_equal(df["B"], df["B"])
438+
tm.assert_series_equal(df["B"], df["D"], check_names=False)
439+
440+
# C & E are the same Series
441+
tm.assert_series_equal(df["C"], df["C"])
442+
tm.assert_series_equal(df["C"], df["E"], check_names=False)
443+
444+
445+
class TestSetitemTZAwareValues:
446+
@pytest.fixture
447+
def idx(self):
448+
naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B")
449+
idx = naive.tz_localize("US/Pacific")
450+
return idx
451+
452+
@pytest.fixture
453+
def expected(self, idx):
454+
expected = Series(np.array(idx.tolist(), dtype="object"), name="B")
455+
assert expected.dtype == idx.dtype
456+
return expected
457+
458+
def test_setitem_dt64series(self, idx, expected):
459+
# convert to utc
460+
df = DataFrame(np.random.randn(2, 1), columns=["A"])
461+
df["B"] = idx
462+
463+
with tm.assert_produces_warning(FutureWarning) as m:
464+
df["B"] = idx.to_series(keep_tz=False, index=[0, 1])
465+
msg = "do 'idx.tz_convert(None)' before calling"
466+
assert msg in str(m[0].message)
467+
468+
result = df["B"]
469+
comp = Series(idx.tz_convert("UTC").tz_localize(None), name="B")
470+
tm.assert_series_equal(result, comp)
471+
472+
def test_setitem_datetimeindex(self, idx, expected):
473+
# setting a DataFrame column with a tzaware DTI retains the dtype
474+
df = DataFrame(np.random.randn(2, 1), columns=["A"])
475+
476+
# assign to frame
477+
df["B"] = idx
478+
result = df["B"]
479+
tm.assert_series_equal(result, expected)
480+
481+
def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected):
482+
# setting a DataFrame column with a tzaware DTI retains the dtype
483+
df = DataFrame(np.random.randn(2, 1), columns=["A"])
484+
485+
# object array of datetimes with a tz
486+
df["B"] = idx.to_pydatetime()
487+
result = df["B"]
488+
tm.assert_series_equal(result, expected)
489+
398490

399491
class TestDataFrameSetItemWithExpansion:
400492
def test_setitem_listlike_views(self):

pandas/tests/frame/methods/test_reindex.py

+37
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,43 @@
2121
import pandas.core.common as com
2222

2323

24+
class TestReindexSetIndex:
25+
# Tests that check both reindex and set_index
26+
27+
def test_dti_set_index_reindex_datetimeindex(self):
28+
# GH#6631
29+
df = DataFrame(np.random.random(6))
30+
idx1 = date_range("2011/01/01", periods=6, freq="M", tz="US/Eastern")
31+
idx2 = date_range("2013", periods=6, freq="A", tz="Asia/Tokyo")
32+
33+
df = df.set_index(idx1)
34+
tm.assert_index_equal(df.index, idx1)
35+
df = df.reindex(idx2)
36+
tm.assert_index_equal(df.index, idx2)
37+
38+
def test_dti_set_index_reindex_freq_with_tz(self):
39+
# GH#11314 with tz
40+
index = date_range(
41+
datetime(2015, 10, 1), datetime(2015, 10, 1, 23), freq="H", tz="US/Eastern"
42+
)
43+
df = DataFrame(np.random.randn(24, 1), columns=["a"], index=index)
44+
new_index = date_range(
45+
datetime(2015, 10, 2), datetime(2015, 10, 2, 23), freq="H", tz="US/Eastern"
46+
)
47+
48+
result = df.set_index(new_index)
49+
assert result.index.freq == index.freq
50+
51+
def test_set_reset_index_intervalindex(self):
52+
53+
df = DataFrame({"A": range(10)})
54+
ser = pd.cut(df.A, 5)
55+
df["B"] = ser
56+
df = df.set_index("B")
57+
58+
df = df.reset_index()
59+
60+
2461
class TestDataFrameSelectReindex:
2562
# These are specific reindex-based tests; other indexing tests should go in
2663
# test_indexing

pandas/tests/frame/methods/test_set_index.py

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
"""
2+
See also: test_reindex.py:TestReindexSetIndex
3+
"""
4+
15
from datetime import datetime, timedelta
26

37
import numpy as np

0 commit comments

Comments
 (0)