13
13
import pandas as pd
14
14
from pandas .core .frame import DataFrame , Series
15
15
from pandas .io .parsers import read_csv
16
- from pandas .io .stata import read_stata , StataReader , InvalidColumnName
16
+ from pandas .io .stata import (read_stata , StataReader , InvalidColumnName ,
17
+ PossiblePrecisionLoss )
17
18
import pandas .util .testing as tm
18
19
from pandas .util .misc import is_little_endian
19
20
from pandas import compat
@@ -142,8 +143,7 @@ def test_read_dta2(self):
142
143
parsed_117 = self .read_dta (self .dta2_117 )
143
144
# 113 is buggy due ot limits date format support in Stata
144
145
# parsed_113 = self.read_dta(self.dta2_113)
145
-
146
- np .testing .assert_equal (
146
+ tm .assert_equal (
147
147
len (w ), 1 ) # should get a warning for that format.
148
148
149
149
# buggy test because of the NaT comparison on certain platforms
@@ -206,7 +206,7 @@ def test_read_write_dta5(self):
206
206
original .index .name = 'index'
207
207
208
208
with tm .ensure_clean () as path :
209
- original .to_stata (path , None , False )
209
+ original .to_stata (path , None )
210
210
written_and_read_again = self .read_dta (path )
211
211
tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
212
212
original )
@@ -221,7 +221,7 @@ def test_write_dta6(self):
221
221
original ['quarter' ] = original ['quarter' ].astype (np .int32 )
222
222
223
223
with tm .ensure_clean () as path :
224
- original .to_stata (path , None , False )
224
+ original .to_stata (path , None )
225
225
written_and_read_again = self .read_dta (path )
226
226
tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
227
227
original )
@@ -257,7 +257,7 @@ def test_read_write_dta10(self):
257
257
original ['integer' ] = original ['integer' ].astype (np .int32 )
258
258
259
259
with tm .ensure_clean () as path :
260
- original .to_stata (path , {'datetime' : 'tc' }, False )
260
+ original .to_stata (path , {'datetime' : 'tc' })
261
261
written_and_read_again = self .read_dta (path )
262
262
tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
263
263
original )
@@ -295,9 +295,9 @@ def test_read_write_dta11(self):
295
295
296
296
with tm .ensure_clean () as path :
297
297
with warnings .catch_warnings (record = True ) as w :
298
- original .to_stata (path , None , False )
299
- np . testing . assert_equal (
300
- len (w ), 1 ) # should get a warning for that format.
298
+ original .to_stata (path , None )
299
+ # should get a warning for that format.
300
+ tm . assert_equal ( len (w ), 1 )
301
301
302
302
written_and_read_again = self .read_dta (path )
303
303
tm .assert_frame_equal (written_and_read_again .set_index ('index' ), formatted )
@@ -324,13 +324,12 @@ def test_read_write_dta12(self):
324
324
325
325
with tm .ensure_clean () as path :
326
326
with warnings .catch_warnings (record = True ) as w :
327
- original .to_stata (path , None , False )
328
- np .testing .assert_equal (
329
- len (w ), 1 ) # should get a warning for that format.
327
+ original .to_stata (path , None )
328
+ tm .assert_equal (len (w ), 1 ) # should get a warning for that format.
330
329
331
330
written_and_read_again = self .read_dta (path )
332
331
tm .assert_frame_equal (written_and_read_again .set_index ('index' ), formatted )
333
-
332
+
334
333
def test_read_write_dta13 (self ):
335
334
s1 = Series (2 ** 9 , dtype = np .int16 )
336
335
s2 = Series (2 ** 17 , dtype = np .int32 )
@@ -366,7 +365,7 @@ def test_read_write_reread_dta14(self):
366
365
tm .assert_frame_equal (parsed_114 , parsed_115 )
367
366
368
367
with tm .ensure_clean () as path :
369
- parsed_114 .to_stata (path , {'date_td' : 'td' }, write_index = False )
368
+ parsed_114 .to_stata (path , {'date_td' : 'td' })
370
369
written_and_read_again = self .read_dta (path )
371
370
tm .assert_frame_equal (written_and_read_again .set_index ('index' ), parsed_114 )
372
371
@@ -406,7 +405,7 @@ def test_numeric_column_names(self):
406
405
with warnings .catch_warnings (record = True ) as w :
407
406
tm .assert_produces_warning (original .to_stata (path ), InvalidColumnName )
408
407
# should produce a single warning
409
- np . testing .assert_equal (len (w ), 1 )
408
+ tm .assert_equal (len (w ), 1 )
410
409
411
410
written_and_read_again = self .read_dta (path )
412
411
written_and_read_again = written_and_read_again .set_index ('index' )
@@ -415,7 +414,102 @@ def test_numeric_column_names(self):
415
414
written_and_read_again .columns = map (convert_col_name , columns )
416
415
tm .assert_frame_equal (original , written_and_read_again )
417
416
417
+ def test_nan_to_missing_value (self ):
418
+ s1 = Series (np .arange (4.0 ), dtype = np .float32 )
419
+ s2 = Series (np .arange (4.0 ), dtype = np .float64 )
420
+ s1 [::2 ] = np .nan
421
+ s2 [1 ::2 ] = np .nan
422
+ original = DataFrame ({'s1' : s1 , 's2' : s2 })
423
+ original .index .name = 'index'
424
+ with tm .ensure_clean () as path :
425
+ original .to_stata (path )
426
+ written_and_read_again = self .read_dta (path )
427
+ written_and_read_again = written_and_read_again .set_index ('index' )
428
+ tm .assert_frame_equal (written_and_read_again , original )
429
+
430
+ def test_no_index (self ):
431
+ columns = ['x' , 'y' ]
432
+ original = DataFrame (np .reshape (np .arange (10.0 ), (5 , 2 )),
433
+ columns = columns )
434
+ original .index .name = 'index_not_written'
435
+ with tm .ensure_clean () as path :
436
+ original .to_stata (path , write_index = False )
437
+ written_and_read_again = self .read_dta (path )
438
+ tm .assertRaises (KeyError ,
439
+ lambda : written_and_read_again ['index_not_written' ])
440
+
441
+ def test_string_no_dates (self ):
442
+ s1 = Series (['a' , 'A longer string' ])
443
+ s2 = Series ([1.0 , 2.0 ], dtype = np .float64 )
444
+ original = DataFrame ({'s1' : s1 , 's2' : s2 })
445
+ original .index .name = 'index'
446
+ with tm .ensure_clean () as path :
447
+ original .to_stata (path )
448
+ written_and_read_again = self .read_dta (path )
449
+ tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
450
+ original )
451
+
452
+ def test_large_value_conversion (self ):
453
+ s0 = Series ([1 , 99 ], dtype = np .int8 )
454
+ s1 = Series ([1 , 127 ], dtype = np .int8 )
455
+ s2 = Series ([1 , 2 ** 15 - 1 ], dtype = np .int16 )
456
+ s3 = Series ([1 , 2 ** 63 - 1 ], dtype = np .int64 )
457
+ original = DataFrame ({'s0' : s0 , 's1' : s1 , 's2' : s2 , 's3' : s3 })
458
+ original .index .name = 'index'
459
+ with tm .ensure_clean () as path :
460
+ with warnings .catch_warnings (record = True ) as w :
461
+ tm .assert_produces_warning (original .to_stata (path ),
462
+ PossiblePrecisionLoss )
463
+ # should produce a single warning
464
+ tm .assert_equal (len (w ), 1 )
465
+
466
+ written_and_read_again = self .read_dta (path )
467
+ modified = original .copy ()
468
+ modified ['s1' ] = Series (modified ['s1' ], dtype = np .int16 )
469
+ modified ['s2' ] = Series (modified ['s2' ], dtype = np .int32 )
470
+ modified ['s3' ] = Series (modified ['s3' ], dtype = np .float64 )
471
+ tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
472
+ modified )
473
+
474
+ def test_dates_invalid_column (self ):
475
+ original = DataFrame ([datetime (2006 , 11 , 19 , 23 , 13 , 20 )])
476
+ original .index .name = 'index'
477
+ with tm .ensure_clean () as path :
478
+ with warnings .catch_warnings (record = True ) as w :
479
+ tm .assert_produces_warning (original .to_stata (path , {0 : 'tc' }),
480
+ InvalidColumnName )
481
+ tm .assert_equal (len (w ), 1 )
482
+
483
+ written_and_read_again = self .read_dta (path )
484
+ modified = original .copy ()
485
+ modified .columns = ['_0' ]
486
+ tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
487
+ modified )
488
+
489
+ def test_date_export_formats (self ):
490
+ columns = ['tc' , 'td' , 'tw' , 'tm' , 'tq' , 'th' , 'ty' ]
491
+ conversions = dict (((c , c ) for c in columns ))
492
+ data = [datetime (2006 , 11 , 20 , 23 , 13 , 20 )] * len (columns )
493
+ original = DataFrame ([data ], columns = columns )
494
+ original .index .name = 'index'
495
+ expected_values = [datetime (2006 , 11 , 20 , 23 , 13 , 20 ), # Time
496
+ datetime (2006 , 11 , 20 ), # Day
497
+ datetime (2006 , 11 , 19 ), # Week
498
+ datetime (2006 , 11 , 1 ), # Month
499
+ datetime (2006 , 10 , 1 ), # Quarter year
500
+ datetime (2006 , 7 , 1 ), # Half year
501
+ datetime (2006 , 1 , 1 )] # Year
502
+
503
+ expected = DataFrame ([expected_values ], columns = columns )
504
+ expected .index .name = 'index'
505
+ with tm .ensure_clean () as path :
506
+ original .to_stata (path , conversions )
507
+ written_and_read_again = self .read_dta (path )
508
+ tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
509
+ expected )
510
+
418
511
419
512
if __name__ == '__main__' :
420
513
nose .runmodule (argv = [__file__ , '-vvs' , '-x' , '--pdb' , '--pdb-failure' ],
421
514
exit = False )
515
+
0 commit comments