10
10
11
11
from pandas .core .frame import DataFrame , Series
12
12
from pandas .io .parsers import read_csv
13
- from pandas .io .stata import read_stata , StataReader , StataWriter
13
+ from pandas .io .stata import read_stata , StataReader
14
14
import pandas .util .testing as tm
15
- from pandas .util .testing import ensure_clean
16
15
from pandas .util .misc import is_little_endian
17
16
18
17
@@ -27,15 +26,12 @@ def setUp(self):
27
26
self .dta3 = os .path .join (self .dirpath , 'stata3.dta' )
28
27
self .csv3 = os .path .join (self .dirpath , 'stata3.csv' )
29
28
self .dta4 = os .path .join (self .dirpath , 'stata4.dta' )
30
- self .dta5 = os .path .join (self .dirpath , 'stata5.dta' )
31
- self .dta6 = os .path .join (self .dirpath , 'stata6.dta' )
32
29
self .dta7 = os .path .join (self .dirpath , 'cancer.dta' )
33
30
self .csv7 = os .path .join (self .dirpath , 'cancer.csv' )
34
31
self .dta8 = os .path .join (self .dirpath , 'tbl19-3.dta' )
35
32
self .csv8 = os .path .join (self .dirpath , 'tbl19-3.csv' )
36
33
self .dta9 = os .path .join (self .dirpath , 'lbw.dta' )
37
34
self .csv9 = os .path .join (self .dirpath , 'lbw.csv' )
38
- self .dta10 = os .path .join (self .dirpath , 'stata10.dta' )
39
35
40
36
def read_dta (self , file ):
41
37
return read_stata (file , convert_dates = True )
@@ -46,9 +42,11 @@ def read_csv(self, file):
46
42
def test_read_dta1 (self ):
47
43
reader = StataReader (self .dta1 )
48
44
parsed = reader .data ()
49
- # Pandas uses np.nan as missing value. Thus, all columns will be of type float, regardless of their name.
45
+ # Pandas uses np.nan as missing value.
46
+ # Thus, all columns will be of type float, regardless of their name.
50
47
expected = DataFrame ([(np .nan , np .nan , np .nan , np .nan , np .nan )],
51
- columns = ['float_miss' , 'double_miss' , 'byte_miss' , 'int_miss' , 'long_miss' ])
48
+ columns = ['float_miss' , 'double_miss' , 'byte_miss' ,
49
+ 'int_miss' , 'long_miss' ])
52
50
53
51
for i , col in enumerate (parsed .columns ):
54
52
np .testing .assert_almost_equal (
@@ -90,7 +88,9 @@ def test_read_dta2(self):
90
88
np .datetime64 ('NaT' )
91
89
)
92
90
],
93
- columns = ['datetime_c' , 'datetime_big_c' , 'date' , 'weekly_date' , 'monthly_date' , 'quarterly_date' , 'half_yearly_date' , 'yearly_date' ]
91
+ columns = ['datetime_c' , 'datetime_big_c' , 'date' , 'weekly_date' ,
92
+ 'monthly_date' , 'quarterly_date' , 'half_yearly_date' ,
93
+ 'yearly_date' ]
94
94
)
95
95
96
96
with warnings .catch_warnings (record = True ) as w :
@@ -125,34 +125,40 @@ def test_read_dta4(self):
125
125
["nine" , "two" , 9 , np .nan , "nine" ],
126
126
["ten" , "one" , "ten" , np .nan , "ten" ]
127
127
],
128
- columns = ['fully_labeled' , 'fully_labeled2' , 'incompletely_labeled' , 'labeled_with_missings' , 'float_labelled' ])
128
+ columns = ['fully_labeled' , 'fully_labeled2' , 'incompletely_labeled' ,
129
+ 'labeled_with_missings' , 'float_labelled' ])
129
130
130
131
tm .assert_frame_equal (parsed , expected )
131
132
132
- def test_write_dta5 (self ):
133
+ def test_read_write_dta5 (self ):
133
134
if not is_little_endian ():
134
- raise nose .SkipTest ("known failure of test_write_dta5 on non-little endian" )
135
+ raise nose .SkipTest ("known failure of test_write_dta5 on "
136
+ "non-little endian" )
135
137
136
138
original = DataFrame ([(np .nan , np .nan , np .nan , np .nan , np .nan )],
137
- columns = ['float_miss' , 'double_miss' , 'byte_miss' , 'int_miss' , 'long_miss' ])
139
+ columns = ['float_miss' , 'double_miss' , 'byte_miss' ,
140
+ 'int_miss' , 'long_miss' ])
138
141
original .index .name = 'index'
139
142
140
- with ensure_clean (self . dta5 ) as path :
143
+ with tm . ensure_clean () as path :
141
144
original .to_stata (path , None , False )
142
145
written_and_read_again = self .read_dta (path )
143
- tm .assert_frame_equal (written_and_read_again .set_index ('index' ), original )
146
+ tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
147
+ original )
144
148
145
149
def test_write_dta6 (self ):
146
150
if not is_little_endian ():
147
- raise nose .SkipTest ("known failure of test_write_dta6 on non-little endian" )
151
+ raise nose .SkipTest ("known failure of test_write_dta6 on "
152
+ "non-little endian" )
148
153
149
154
original = self .read_csv (self .csv3 )
150
155
original .index .name = 'index'
151
156
152
- with ensure_clean (self . dta6 ) as path :
157
+ with tm . ensure_clean () as path :
153
158
original .to_stata (path , None , False )
154
159
written_and_read_again = self .read_dta (path )
155
- tm .assert_frame_equal (written_and_read_again .set_index ('index' ), original )
160
+ tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
161
+ original )
156
162
157
163
@nose .tools .nottest
158
164
def test_read_dta7 (self ):
@@ -190,29 +196,30 @@ def test_read_dta9(self):
190
196
decimal = 3
191
197
)
192
198
193
- def test_read_dta10 (self ):
199
+ def test_read_write_dta10 (self ):
194
200
if not is_little_endian ():
195
- raise nose .SkipTest ("known failure of test_write_dta10 on non-little endian" )
201
+ raise nose .SkipTest ("known failure of test_write_dta10 on "
202
+ "non-little endian" )
196
203
197
- original = DataFrame (
198
- data =
199
- [
200
- ["string" , "object" , 1 , 1.1 , np .datetime64 ('2003-12-25' )]
201
- ],
202
- columns = ['string' , 'object' , 'integer' , 'float' , 'datetime' ])
204
+ original = DataFrame (data = [["string" , "object" , 1 , 1.1 ,
205
+ np .datetime64 ('2003-12-25' )]],
206
+ columns = ['string' , 'object' , 'integer' , 'float' ,
207
+ 'datetime' ])
203
208
original ["object" ] = Series (original ["object" ], dtype = object )
204
209
original .index .name = 'index'
205
210
206
- with ensure_clean (self . dta10 ) as path :
211
+ with tm . ensure_clean () as path :
207
212
original .to_stata (path , {'datetime' : 'tc' }, False )
208
213
written_and_read_again = self .read_dta (path )
209
- tm .assert_frame_equal (written_and_read_again .set_index ('index' ), original )
214
+ tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
215
+ original )
210
216
211
217
def test_stata_doc_examples (self ):
212
- with ensure_clean (self . dta5 ) as path :
218
+ with tm . ensure_clean () as path :
213
219
df = DataFrame (np .random .randn (10 , 2 ), columns = list ('AB' ))
214
220
df .to_stata (path )
215
221
222
+
216
223
if __name__ == '__main__' :
217
224
nose .runmodule (argv = [__file__ , '-vvs' , '-x' , '--pdb' , '--pdb-failure' ],
218
225
exit = False )
0 commit comments