Skip to content

Commit 0eb010e

Browse files
jbrockmendeljreback
authored andcommitted
REF: method-specific test files for Series/DataFrame (#30414)
1 parent 0f466dc commit 0eb010e

12 files changed

+992
-933
lines changed
+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import numpy as np
2+
import pytest
3+
4+
import pandas as pd
5+
from pandas import DataFrame, Series, Timestamp, date_range
6+
import pandas.util.testing as tm
7+
8+
9+
class TestDataFrameDiff:
10+
def test_diff(self, datetime_frame):
11+
the_diff = datetime_frame.diff(1)
12+
13+
tm.assert_series_equal(
14+
the_diff["A"], datetime_frame["A"] - datetime_frame["A"].shift(1)
15+
)
16+
17+
# int dtype
18+
a = 10000000000000000
19+
b = a + 1
20+
s = Series([a, b])
21+
22+
rs = DataFrame({"s": s}).diff()
23+
assert rs.s[1] == 1
24+
25+
# mixed numeric
26+
tf = datetime_frame.astype("float32")
27+
the_diff = tf.diff(1)
28+
tm.assert_series_equal(the_diff["A"], tf["A"] - tf["A"].shift(1))
29+
30+
# GH#10907
31+
df = pd.DataFrame({"y": pd.Series([2]), "z": pd.Series([3])})
32+
df.insert(0, "x", 1)
33+
result = df.diff(axis=1)
34+
expected = pd.DataFrame(
35+
{"x": np.nan, "y": pd.Series(1), "z": pd.Series(1)}
36+
).astype("float64")
37+
tm.assert_frame_equal(result, expected)
38+
39+
@pytest.mark.parametrize("tz", [None, "UTC"])
40+
def test_diff_datetime_axis0(self, tz):
41+
# GH#18578
42+
df = DataFrame(
43+
{
44+
0: date_range("2010", freq="D", periods=2, tz=tz),
45+
1: date_range("2010", freq="D", periods=2, tz=tz),
46+
}
47+
)
48+
49+
result = df.diff(axis=0)
50+
expected = DataFrame(
51+
{
52+
0: pd.TimedeltaIndex(["NaT", "1 days"]),
53+
1: pd.TimedeltaIndex(["NaT", "1 days"]),
54+
}
55+
)
56+
tm.assert_frame_equal(result, expected)
57+
58+
@pytest.mark.parametrize("tz", [None, "UTC"])
59+
def test_diff_datetime_axis1(self, tz):
60+
# GH#18578
61+
df = DataFrame(
62+
{
63+
0: date_range("2010", freq="D", periods=2, tz=tz),
64+
1: date_range("2010", freq="D", periods=2, tz=tz),
65+
}
66+
)
67+
if tz is None:
68+
result = df.diff(axis=1)
69+
expected = DataFrame(
70+
{
71+
0: pd.TimedeltaIndex(["NaT", "NaT"]),
72+
1: pd.TimedeltaIndex(["0 days", "0 days"]),
73+
}
74+
)
75+
tm.assert_frame_equal(result, expected)
76+
else:
77+
with pytest.raises(NotImplementedError):
78+
result = df.diff(axis=1)
79+
80+
def test_diff_timedelta(self):
81+
# GH#4533
82+
df = DataFrame(
83+
dict(
84+
time=[Timestamp("20130101 9:01"), Timestamp("20130101 9:02")],
85+
value=[1.0, 2.0],
86+
)
87+
)
88+
89+
res = df.diff()
90+
exp = DataFrame(
91+
[[pd.NaT, np.nan], [pd.Timedelta("00:01:00"), 1]], columns=["time", "value"]
92+
)
93+
tm.assert_frame_equal(res, exp)
94+
95+
def test_diff_mixed_dtype(self):
96+
df = DataFrame(np.random.randn(5, 3))
97+
df["A"] = np.array([1, 2, 3, 4, 5], dtype=object)
98+
99+
result = df.diff()
100+
assert result[0].dtype == np.float64
101+
102+
def test_diff_neg_n(self, datetime_frame):
103+
rs = datetime_frame.diff(-1)
104+
xp = datetime_frame - datetime_frame.shift(-1)
105+
tm.assert_frame_equal(rs, xp)
106+
107+
def test_diff_float_n(self, datetime_frame):
108+
rs = datetime_frame.diff(1.0)
109+
xp = datetime_frame.diff(1)
110+
tm.assert_frame_equal(rs, xp)
111+
112+
def test_diff_axis(self):
113+
# GH#9727
114+
df = DataFrame([[1.0, 2.0], [3.0, 4.0]])
115+
tm.assert_frame_equal(
116+
df.diff(axis=1), DataFrame([[np.nan, 1.0], [np.nan, 1.0]])
117+
)
118+
tm.assert_frame_equal(
119+
df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]])
120+
)
+187
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
import numpy as np
2+
import pytest
3+
4+
import pandas as pd
5+
from pandas import DataFrame, Index, Series, date_range, offsets
6+
import pandas.util.testing as tm
7+
8+
9+
class TestDataFrameShift:
10+
def test_shift(self, datetime_frame, int_frame):
11+
# naive shift
12+
shiftedFrame = datetime_frame.shift(5)
13+
tm.assert_index_equal(shiftedFrame.index, datetime_frame.index)
14+
15+
shiftedSeries = datetime_frame["A"].shift(5)
16+
tm.assert_series_equal(shiftedFrame["A"], shiftedSeries)
17+
18+
shiftedFrame = datetime_frame.shift(-5)
19+
tm.assert_index_equal(shiftedFrame.index, datetime_frame.index)
20+
21+
shiftedSeries = datetime_frame["A"].shift(-5)
22+
tm.assert_series_equal(shiftedFrame["A"], shiftedSeries)
23+
24+
# shift by 0
25+
unshifted = datetime_frame.shift(0)
26+
tm.assert_frame_equal(unshifted, datetime_frame)
27+
28+
# shift by DateOffset
29+
shiftedFrame = datetime_frame.shift(5, freq=offsets.BDay())
30+
assert len(shiftedFrame) == len(datetime_frame)
31+
32+
shiftedFrame2 = datetime_frame.shift(5, freq="B")
33+
tm.assert_frame_equal(shiftedFrame, shiftedFrame2)
34+
35+
d = datetime_frame.index[0]
36+
shifted_d = d + offsets.BDay(5)
37+
tm.assert_series_equal(
38+
datetime_frame.xs(d), shiftedFrame.xs(shifted_d), check_names=False
39+
)
40+
41+
# shift int frame
42+
int_shifted = int_frame.shift(1) # noqa
43+
44+
# Shifting with PeriodIndex
45+
ps = tm.makePeriodFrame()
46+
shifted = ps.shift(1)
47+
unshifted = shifted.shift(-1)
48+
tm.assert_index_equal(shifted.index, ps.index)
49+
tm.assert_index_equal(unshifted.index, ps.index)
50+
tm.assert_numpy_array_equal(
51+
unshifted.iloc[:, 0].dropna().values, ps.iloc[:-1, 0].values
52+
)
53+
54+
shifted2 = ps.shift(1, "B")
55+
shifted3 = ps.shift(1, offsets.BDay())
56+
tm.assert_frame_equal(shifted2, shifted3)
57+
tm.assert_frame_equal(ps, shifted2.shift(-1, "B"))
58+
59+
msg = "does not match PeriodIndex freq"
60+
with pytest.raises(ValueError, match=msg):
61+
ps.shift(freq="D")
62+
63+
# shift other axis
64+
# GH#6371
65+
df = DataFrame(np.random.rand(10, 5))
66+
expected = pd.concat(
67+
[DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]],
68+
ignore_index=True,
69+
axis=1,
70+
)
71+
result = df.shift(1, axis=1)
72+
tm.assert_frame_equal(result, expected)
73+
74+
# shift named axis
75+
df = DataFrame(np.random.rand(10, 5))
76+
expected = pd.concat(
77+
[DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]],
78+
ignore_index=True,
79+
axis=1,
80+
)
81+
result = df.shift(1, axis="columns")
82+
tm.assert_frame_equal(result, expected)
83+
84+
def test_shift_bool(self):
85+
df = DataFrame({"high": [True, False], "low": [False, False]})
86+
rs = df.shift(1)
87+
xp = DataFrame(
88+
np.array([[np.nan, np.nan], [True, False]], dtype=object),
89+
columns=["high", "low"],
90+
)
91+
tm.assert_frame_equal(rs, xp)
92+
93+
def test_shift_categorical(self):
94+
# GH#9416
95+
s1 = pd.Series(["a", "b", "c"], dtype="category")
96+
s2 = pd.Series(["A", "B", "C"], dtype="category")
97+
df = DataFrame({"one": s1, "two": s2})
98+
rs = df.shift(1)
99+
xp = DataFrame({"one": s1.shift(1), "two": s2.shift(1)})
100+
tm.assert_frame_equal(rs, xp)
101+
102+
def test_shift_fill_value(self):
103+
# GH#24128
104+
df = DataFrame(
105+
[1, 2, 3, 4, 5], index=date_range("1/1/2000", periods=5, freq="H")
106+
)
107+
exp = DataFrame(
108+
[0, 1, 2, 3, 4], index=date_range("1/1/2000", periods=5, freq="H")
109+
)
110+
result = df.shift(1, fill_value=0)
111+
tm.assert_frame_equal(result, exp)
112+
113+
exp = DataFrame(
114+
[0, 0, 1, 2, 3], index=date_range("1/1/2000", periods=5, freq="H")
115+
)
116+
result = df.shift(2, fill_value=0)
117+
tm.assert_frame_equal(result, exp)
118+
119+
def test_shift_empty(self):
120+
# Regression test for GH#8019
121+
df = DataFrame({"foo": []})
122+
rs = df.shift(-1)
123+
124+
tm.assert_frame_equal(df, rs)
125+
126+
def test_shift_duplicate_columns(self):
127+
# GH#9092; verify that position-based shifting works
128+
# in the presence of duplicate columns
129+
column_lists = [list(range(5)), [1] * 5, [1, 1, 2, 2, 1]]
130+
data = np.random.randn(20, 5)
131+
132+
shifted = []
133+
for columns in column_lists:
134+
df = pd.DataFrame(data.copy(), columns=columns)
135+
for s in range(5):
136+
df.iloc[:, s] = df.iloc[:, s].shift(s + 1)
137+
df.columns = range(5)
138+
shifted.append(df)
139+
140+
# sanity check the base case
141+
nulls = shifted[0].isna().sum()
142+
tm.assert_series_equal(nulls, Series(range(1, 6), dtype="int64"))
143+
144+
# check all answers are the same
145+
tm.assert_frame_equal(shifted[0], shifted[1])
146+
tm.assert_frame_equal(shifted[0], shifted[2])
147+
148+
def test_tshift(self, datetime_frame):
149+
# PeriodIndex
150+
ps = tm.makePeriodFrame()
151+
shifted = ps.tshift(1)
152+
unshifted = shifted.tshift(-1)
153+
154+
tm.assert_frame_equal(unshifted, ps)
155+
156+
shifted2 = ps.tshift(freq="B")
157+
tm.assert_frame_equal(shifted, shifted2)
158+
159+
shifted3 = ps.tshift(freq=offsets.BDay())
160+
tm.assert_frame_equal(shifted, shifted3)
161+
162+
with pytest.raises(ValueError, match="does not match"):
163+
ps.tshift(freq="M")
164+
165+
# DatetimeIndex
166+
shifted = datetime_frame.tshift(1)
167+
unshifted = shifted.tshift(-1)
168+
169+
tm.assert_frame_equal(datetime_frame, unshifted)
170+
171+
shifted2 = datetime_frame.tshift(freq=datetime_frame.index.freq)
172+
tm.assert_frame_equal(shifted, shifted2)
173+
174+
inferred_ts = DataFrame(
175+
datetime_frame.values,
176+
Index(np.asarray(datetime_frame.index)),
177+
columns=datetime_frame.columns,
178+
)
179+
shifted = inferred_ts.tshift(1)
180+
unshifted = shifted.tshift(-1)
181+
tm.assert_frame_equal(shifted, datetime_frame.tshift(1))
182+
tm.assert_frame_equal(unshifted, inferred_ts)
183+
184+
no_freq = datetime_frame.iloc[[0, 5, 7], :]
185+
msg = "Freq was not given and was not set in the index"
186+
with pytest.raises(ValueError, match=msg):
187+
no_freq.tshift()

0 commit comments

Comments
 (0)