@@ -90,7 +90,12 @@ def test_read_empty_dta_with_dtypes(self, version):
90
90
"f64" : np .array ([0 ], dtype = np .float64 ),
91
91
}
92
92
)
93
- expected = empty_df_typed .copy ()
93
+ # GH 7369, make sure can read a 0-obs dta file
94
+ with tm .ensure_clean () as path :
95
+ empty_df_typed .to_stata (path , write_index = False , version = version )
96
+ empty_reread = read_stata (path )
97
+
98
+ expected = empty_df_typed
94
99
# No uint# support. Downcast since values in range for int#
95
100
expected ["u8" ] = expected ["u8" ].astype (np .int8 )
96
101
expected ["u16" ] = expected ["u16" ].astype (np .int16 )
@@ -99,12 +104,8 @@ def test_read_empty_dta_with_dtypes(self, version):
99
104
expected ["u64" ] = expected ["u64" ].astype (np .int32 )
100
105
expected ["i64" ] = expected ["i64" ].astype (np .int32 )
101
106
102
- # GH 7369, make sure can read a 0-obs dta file
103
- with tm .ensure_clean () as path :
104
- empty_df_typed .to_stata (path , write_index = False , version = version )
105
- empty_reread = read_stata (path )
106
- tm .assert_frame_equal (expected , empty_reread )
107
- tm .assert_series_equal (expected .dtypes , empty_reread .dtypes )
107
+ tm .assert_frame_equal (expected , empty_reread )
108
+ tm .assert_series_equal (expected .dtypes , empty_reread .dtypes )
108
109
109
110
@pytest .mark .parametrize ("version" , [114 , 117 , 118 , 119 , None ])
110
111
def test_read_index_col_none (self , version ):
@@ -115,7 +116,7 @@ def test_read_index_col_none(self, version):
115
116
read_df = read_stata (path )
116
117
117
118
assert isinstance (read_df .index , pd .RangeIndex )
118
- expected = df . copy ()
119
+ expected = df
119
120
expected ["a" ] = expected ["a" ].astype (np .int32 )
120
121
tm .assert_frame_equal (read_df , expected , check_index_type = True )
121
122
@@ -325,7 +326,7 @@ def test_read_write_dta5(self):
325
326
original .to_stata (path , convert_dates = None )
326
327
written_and_read_again = self .read_dta (path )
327
328
328
- expected = original . copy ()
329
+ expected = original
329
330
expected .index = expected .index .astype (np .int32 )
330
331
tm .assert_frame_equal (written_and_read_again .set_index ("index" ), expected )
331
332
@@ -424,7 +425,7 @@ def test_read_write_dta11(self):
424
425
425
426
written_and_read_again = self .read_dta (path )
426
427
427
- expected = formatted . copy ()
428
+ expected = formatted
428
429
expected .index = expected .index .astype (np .int32 )
429
430
tm .assert_frame_equal (written_and_read_again .set_index ("index" ), expected )
430
431
@@ -462,7 +463,7 @@ def test_read_write_dta12(self, version):
462
463
463
464
written_and_read_again = self .read_dta (path )
464
465
465
- expected = formatted . copy ()
466
+ expected = formatted
466
467
expected .index = expected .index .astype (np .int32 )
467
468
tm .assert_frame_equal (written_and_read_again .set_index ("index" ), expected )
468
469
@@ -480,7 +481,7 @@ def test_read_write_dta13(self):
480
481
original .to_stata (path )
481
482
written_and_read_again = self .read_dta (path )
482
483
483
- expected = formatted . copy ()
484
+ expected = formatted
484
485
expected .index = expected .index .astype (np .int32 )
485
486
tm .assert_frame_equal (written_and_read_again .set_index ("index" ), expected )
486
487
@@ -561,7 +562,7 @@ def test_numeric_column_names(self):
561
562
convert_col_name = lambda x : int (x [1 ])
562
563
written_and_read_again .columns = map (convert_col_name , columns )
563
564
564
- expected = original . copy ()
565
+ expected = original
565
566
expected .index = expected .index .astype (np .int32 )
566
567
tm .assert_frame_equal (expected , written_and_read_again )
567
568
@@ -579,7 +580,7 @@ def test_nan_to_missing_value(self, version):
579
580
written_and_read_again = self .read_dta (path )
580
581
581
582
written_and_read_again = written_and_read_again .set_index ("index" )
582
- expected = original . copy ()
583
+ expected = original
583
584
expected .index = expected .index .astype (np .int32 )
584
585
tm .assert_frame_equal (written_and_read_again , expected )
585
586
@@ -602,7 +603,7 @@ def test_string_no_dates(self):
602
603
original .to_stata (path )
603
604
written_and_read_again = self .read_dta (path )
604
605
605
- expected = original . copy ()
606
+ expected = original
606
607
expected .index = expected .index .astype (np .int32 )
607
608
tm .assert_frame_equal (written_and_read_again .set_index ("index" ), expected )
608
609
@@ -619,7 +620,7 @@ def test_large_value_conversion(self):
619
620
620
621
written_and_read_again = self .read_dta (path )
621
622
622
- modified = original . copy ()
623
+ modified = original
623
624
modified ["s1" ] = Series (modified ["s1" ], dtype = np .int16 )
624
625
modified ["s2" ] = Series (modified ["s2" ], dtype = np .int32 )
625
626
modified ["s3" ] = Series (modified ["s3" ], dtype = np .float64 )
@@ -635,7 +636,7 @@ def test_dates_invalid_column(self):
635
636
636
637
written_and_read_again = self .read_dta (path )
637
638
638
- modified = original . copy ()
639
+ modified = original
639
640
modified .columns = ["_0" ]
640
641
modified .index = original .index .astype (np .int32 )
641
642
tm .assert_frame_equal (written_and_read_again .set_index ("index" ), modified )
@@ -721,8 +722,15 @@ def test_bool_uint(self, byteorder, version):
721
722
{"s0" : s0 , "s1" : s1 , "s2" : s2 , "s3" : s3 , "s4" : s4 , "s5" : s5 , "s6" : s6 }
722
723
)
723
724
original .index .name = "index"
724
- expected = original .copy ()
725
- expected .index = original .index .astype (np .int32 )
725
+
726
+ with tm .ensure_clean () as path :
727
+ original .to_stata (path , byteorder = byteorder , version = version )
728
+ written_and_read_again = self .read_dta (path )
729
+
730
+ written_and_read_again = written_and_read_again .set_index ("index" )
731
+
732
+ expected = original
733
+ expected .index = expected .index .astype (np .int32 )
726
734
expected_types = (
727
735
np .int8 ,
728
736
np .int8 ,
@@ -735,11 +743,6 @@ def test_bool_uint(self, byteorder, version):
735
743
for c , t in zip (expected .columns , expected_types ):
736
744
expected [c ] = expected [c ].astype (t )
737
745
738
- with tm .ensure_clean () as path :
739
- original .to_stata (path , byteorder = byteorder , version = version )
740
- written_and_read_again = self .read_dta (path )
741
-
742
- written_and_read_again = written_and_read_again .set_index ("index" )
743
746
tm .assert_frame_equal (written_and_read_again , expected )
744
747
745
748
def test_variable_labels (self , datapath ):
@@ -1000,18 +1003,19 @@ def test_categorical_writing(self, version):
1000
1003
"unlabeled" ,
1001
1004
],
1002
1005
)
1003
- expected = original .copy ()
1006
+ with tm .ensure_clean () as path :
1007
+ original .astype ("category" ).to_stata (path , version = version )
1008
+ written_and_read_again = self .read_dta (path )
1004
1009
1005
- # these are all categoricals
1006
- original = pd .concat (
1007
- [original [col ].astype ("category" ) for col in original ], axis = 1
1008
- )
1010
+ res = written_and_read_again .set_index ("index" )
1011
+
1012
+ expected = original
1009
1013
expected .index = expected .index .set_names ("index" ).astype (np .int32 )
1010
1014
1011
1015
expected ["incompletely_labeled" ] = expected ["incompletely_labeled" ].apply (str )
1012
1016
expected ["unlabeled" ] = expected ["unlabeled" ].apply (str )
1013
1017
for col in expected :
1014
- orig = expected [col ]. copy ()
1018
+ orig = expected [col ]
1015
1019
1016
1020
cat = orig .astype ("category" )._values
1017
1021
cat = cat .as_ordered ()
@@ -1022,11 +1026,6 @@ def test_categorical_writing(self, version):
1022
1026
1023
1027
expected [col ] = cat
1024
1028
1025
- with tm .ensure_clean () as path :
1026
- original .to_stata (path , version = version )
1027
- written_and_read_again = self .read_dta (path )
1028
-
1029
- res = written_and_read_again .set_index ("index" )
1030
1029
tm .assert_frame_equal (res , expected )
1031
1030
1032
1031
def test_categorical_warnings_and_errors (self ):
@@ -1037,9 +1036,7 @@ def test_categorical_warnings_and_errors(self):
1037
1036
columns = ["Too_long" ],
1038
1037
)
1039
1038
1040
- original = pd .concat (
1041
- [original [col ].astype ("category" ) for col in original ], axis = 1
1042
- )
1039
+ original = original .astype ("category" )
1043
1040
with tm .ensure_clean () as path :
1044
1041
msg = (
1045
1042
"Stata value labels for a single variable must have "
@@ -1050,10 +1047,7 @@ def test_categorical_warnings_and_errors(self):
1050
1047
1051
1048
original = DataFrame .from_records (
1052
1049
[["a" ], ["b" ], ["c" ], ["d" ], [1 ]], columns = ["Too_long" ]
1053
- )
1054
- original = pd .concat (
1055
- [original [col ].astype ("category" ) for col in original ], axis = 1
1056
- )
1050
+ ).astype ("category" )
1057
1051
1058
1052
with tm .assert_produces_warning (ValueLabelTypeMismatch ):
1059
1053
original .to_stata (path )
@@ -1074,7 +1068,7 @@ def test_categorical_with_stata_missing_values(self, version):
1074
1068
1075
1069
res = written_and_read_again .set_index ("index" )
1076
1070
1077
- expected = original . copy ()
1071
+ expected = original
1078
1072
for col in expected :
1079
1073
cat = expected [col ]._values
1080
1074
new_cats = cat .remove_unused_categories ().categories
@@ -1525,7 +1519,7 @@ def test_out_of_range_float(self):
1525
1519
reread = read_stata (path )
1526
1520
1527
1521
original ["ColumnTooBig" ] = original ["ColumnTooBig" ].astype (np .float64 )
1528
- expected = original . copy ()
1522
+ expected = original
1529
1523
expected .index = expected .index .astype (np .int32 )
1530
1524
tm .assert_frame_equal (reread .set_index ("index" ), expected )
1531
1525
@@ -1672,13 +1666,13 @@ def test_writer_117(self):
1672
1666
version = 117 ,
1673
1667
)
1674
1668
written_and_read_again = self .read_dta (path )
1675
- # original.index is np.int32, read index is np.int64
1676
- tm .assert_frame_equal (
1677
- written_and_read_again .set_index ("index" ),
1678
- original ,
1679
- check_index_type = False ,
1680
- )
1681
- tm .assert_frame_equal (original , copy )
1669
+ # original.index is np.int32, read index is np.int64
1670
+ tm .assert_frame_equal (
1671
+ written_and_read_again .set_index ("index" ),
1672
+ original ,
1673
+ check_index_type = False ,
1674
+ )
1675
+ tm .assert_frame_equal (original , copy )
1682
1676
1683
1677
def test_convert_strl_name_swap (self ):
1684
1678
original = DataFrame (
@@ -2052,7 +2046,7 @@ def test_compression(compression, version, use_dict, infer, compression_to_exten
2052
2046
fp = path
2053
2047
reread = read_stata (fp , index_col = "index" )
2054
2048
2055
- expected = df . copy ()
2049
+ expected = df
2056
2050
expected .index = expected .index .astype (np .int32 )
2057
2051
tm .assert_frame_equal (reread , expected )
2058
2052
@@ -2078,7 +2072,7 @@ def test_compression_dict(method, file_ext):
2078
2072
fp = path
2079
2073
reread = read_stata (fp , index_col = "index" )
2080
2074
2081
- expected = df . copy ()
2075
+ expected = df
2082
2076
expected .index = expected .index .astype (np .int32 )
2083
2077
tm .assert_frame_equal (reread , expected )
2084
2078
0 commit comments