Skip to content

TST: add concrete examples of dataframe fixtures to docstrings #26593

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 1, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 169 additions & 0 deletions pandas/tests/frame/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,25 @@ def float_frame():
Fixture for DataFrame of floats with index of unique strings

Columns are ['A', 'B', 'C', 'D'].

A B C D
P7GACiRnxd -0.465578 -0.361863 0.886172 -0.053465
qZKh6afn8n -0.466693 -0.373773 0.266873 1.673901
tkp0r6Qble 0.148691 -0.059051 0.174817 1.598433
wP70WOCtv8 0.133045 -0.581994 -0.992240 0.261651
M2AeYQMnCz -1.207959 -0.185775 0.588206 0.563938
QEPzyGDYDo -0.381843 -0.758281 0.502575 -0.565053
r78Jwns6dn -0.653707 0.883127 0.682199 0.206159
... ... ... ... ...
IHEGx9NO0T -0.277360 0.113021 -1.018314 0.196316
lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999
qa66YMWQa5 1.110525 0.475310 -0.747865 0.032121
yOa0ATsmcE -0.431457 0.067094 0.096567 -0.264962
65znX3uRNG 1.528446 0.160416 -0.109635 -0.032987
eCOBvKqf3e 0.235281 1.622222 0.781255 0.392871
xSucinXxuV -1.263557 0.252799 -0.552247 0.400426

[30 rows x 4 columns]
"""
return DataFrame(tm.getSeriesData())

Expand All @@ -21,6 +40,25 @@ def float_frame_with_na():
Fixture for DataFrame of floats with index of unique strings

Columns are ['A', 'B', 'C', 'D']; some entries are missing

A B C D
ABwBzA0ljw -1.128865 -0.897161 0.046603 0.274997
DJiRzmbyQF 0.728869 0.233502 0.722431 -0.890872
neMgPD5UBF 0.486072 -1.027393 -0.031553 1.449522
0yWA4n8VeX -1.937191 -1.142531 0.805215 -0.462018
3slYUbbqU1 0.153260 1.164691 1.489795 -0.545826
soujjZ0A08 NaN NaN NaN NaN
7W6NLGsjB9 NaN NaN NaN NaN
... ... ... ... ...
uhfeaNkCR1 -0.231210 -0.340472 0.244717 -0.901590
n6p7GYuBIV -0.419052 1.922721 -0.125361 -0.727717
ZhzAeY6p1y 1.234374 -1.425359 -0.827038 -0.633189
uWdPsORyUh 0.046738 -0.980445 -1.102965 0.605503
3DJA6aN590 -0.091018 -1.684734 -1.100900 0.215947
2GBPAzdbMk -2.883405 -1.021071 1.209877 1.633083
sHadBoyVHw -2.223032 -0.326384 0.258931 0.245517

[30 rows x 4 columns]
"""
df = DataFrame(tm.getSeriesData())
# set some NAs
Expand All @@ -35,6 +73,25 @@ def bool_frame_with_na():
Fixture for DataFrame of booleans with index of unique strings

Columns are ['A', 'B', 'C', 'D']; some entries are missing

A B C D
zBZxY2IDGd False False False False
IhBWBMWllt False True True True
ctjdvZSR6R True False True True
AVTujptmxb False True False True
G9lrImrSWq False False False True
sFFwdIUfz2 NaN NaN NaN NaN
s15ptEJnRb NaN NaN NaN NaN
... ... ... ... ...
UW41KkDyZ4 True True False False
l9l6XkOdqV True False False False
X2MeZfzDYA False True False False
xWkIKU7vfX False True False True
QOhL6VmpGU False False False True
22PwkRJdat False True False False
kfboQ3VeIK True False True False

[30 rows x 4 columns]
"""
df = DataFrame(tm.getSeriesData()) > 0
df = df.astype(object)
Expand All @@ -50,6 +107,25 @@ def int_frame():
Fixture for DataFrame of ints with index of unique strings

Columns are ['A', 'B', 'C', 'D']

A B C D
vpBeWjM651 1 0 1 0
5JyxmrP1En -1 0 0 0
qEDaoD49U2 -1 1 0 0
m66TkTfsFe 0 0 0 0
EHPaNzEUFm -1 0 -1 0
fpRJCevQhi 2 0 0 0
OlQvnmfi3Q 0 0 -2 0
... .. .. .. ..
uB1FPlz4uP 0 0 0 1
EcSe6yNzCU 0 0 -1 0
L50VudaiI8 -1 1 -2 0
y3bpw4nwIp 0 -1 0 0
H0RdLLwrCT 1 1 0 0
rY82K0vMwm 0 0 0 0
1OPIUjnkjk 2 0 0 0

[30 rows x 4 columns]
"""
df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
# force these all to int64 to avoid platform testing issues
Expand All @@ -62,6 +138,25 @@ def datetime_frame():
Fixture for DataFrame of floats with DatetimeIndex

Columns are ['A', 'B', 'C', 'D']

A B C D
2000-01-03 -1.122153 0.468535 0.122226 1.693711
2000-01-04 0.189378 0.486100 0.007864 -1.216052
2000-01-05 0.041401 -0.835752 -0.035279 -0.414357
2000-01-06 0.430050 0.894352 0.090719 0.036939
2000-01-07 -0.620982 -0.668211 -0.706153 1.466335
2000-01-10 -0.752633 0.328434 -0.815325 0.699674
2000-01-11 -2.236969 0.615737 -0.829076 -1.196106
... ... ... ... ...
2000-02-03 1.642618 -0.579288 0.046005 1.385249
2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351
2000-02-07 -2.656149 -0.601387 1.410148 0.444150
2000-02-08 -1.201881 -1.289040 0.772992 -1.445300
2000-02-09 1.377373 0.398619 1.008453 -0.928207
2000-02-10 0.473194 -0.636677 0.984058 0.511519
2000-02-11 -0.965556 0.408313 -1.312844 -0.381948

[30 rows x 4 columns]
"""
return DataFrame(tm.getTimeSeriesData())

Expand All @@ -72,6 +167,25 @@ def float_string_frame():
Fixture for DataFrame of floats and strings with index of unique strings

Columns are ['A', 'B', 'C', 'D', 'foo'].

A B C D foo
w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar
PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar
ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar
3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar
khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar
LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar
HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar
... ... ... ... ... ...
9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar
h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar
mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar
oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar
9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar
jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar
lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar

[30 rows x 5 columns]
"""
df = DataFrame(tm.getSeriesData())
df['foo'] = 'bar'
Expand All @@ -84,6 +198,25 @@ def mixed_float_frame():
Fixture for DataFrame of different float types with index of unique strings

Columns are ['A', 'B', 'C', 'D'].

A B C D
GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993
KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588
VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731
kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607
CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266
0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541
tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710
... ... ... ... ...
7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237
4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612
B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653
hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427
1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827
9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204
xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502

[30 rows x 4 columns]
"""
df = DataFrame(tm.getSeriesData())
df.A = df.A.astype('float32')
Expand All @@ -99,6 +232,25 @@ def mixed_int_frame():
Fixture for DataFrame of different int types with index of unique strings

Columns are ['A', 'B', 'C', 'D'].

A B C D
mUrCZ67juP 0 1 2 2
rw99ACYaKS 0 1 0 0
7QsEcpaaVU 0 1 1 1
xkrimI2pcE 0 1 0 0
dz01SuzoS8 0 1 255 255
ccQkqOHX75 -1 1 0 0
DN0iXaoDLd 0 1 0 0
... .. .. ... ...
Dfb141wAaQ 1 1 254 254
IPD8eQOVu5 0 1 0 0
CcaKulsCmv 0 1 0 0
rIBa8gu7E5 0 1 0 0
RP6peZmh5o 0 1 1 1
NMb9pipQWQ 0 1 0 0
PqgbJEzjib 0 1 3 3

[30 rows x 4 columns]
"""
df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
df.A = df.A.astype('int32')
Expand All @@ -114,6 +266,11 @@ def timezone_frame():
Fixture for DataFrame of date_range Series with different time zones

Columns are ['A', 'B', 'C']; some entries are missing

A B C
0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00
1 2013-01-02 NaT NaT
2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00
"""
df = DataFrame({'A': date_range('20130101', periods=3),
'B': date_range('20130101', periods=3,
Expand All @@ -131,6 +288,11 @@ def simple_frame():
Fixture for simple 3x3 DataFrame

Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c'].

one two three
a 1.0 2.0 3.0
b 4.0 5.0 6.0
c 7.0 8.0 9.0
"""
arr = np.array([[1., 2., 3.],
[4., 5., 6.],
Expand All @@ -147,6 +309,13 @@ def frame_of_index_cols():

Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')];
'A' & 'B' contain duplicates (but are jointly unique), the rest are unique.

A B C D E (tuple, as, label)
0 foo one a 0.608477 -0.012500 -1.664297
1 foo two b -0.633460 0.249614 -0.364411
2 foo three c 0.615256 2.154968 -0.834666
3 bar one d 0.234246 1.085675 0.718445
4 bar two e 0.533841 -0.005702 -3.533912
"""
df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'],
'B': ['one', 'two', 'three', 'one', 'two'],
Expand Down