@@ -71,6 +71,41 @@ def test_read_empty_dta(self, version):
71
71
empty_ds2 = read_stata (path )
72
72
tm .assert_frame_equal (empty_ds , empty_ds2 )
73
73
74
+ @pytest .mark .parametrize ("version" , [114 , 117 , 118 , 119 , None ])
75
+ def test_read_empty_dta_with_dtypes (self , version ):
76
+ # GH 46240
77
+ # Fixing above bug revealed that types are not correctly preserved when
78
+ # writing empty DataFrames
79
+ empty_df_typed = DataFrame (
80
+ {
81
+ "i8" : np .array ([0 ], dtype = np .int8 ),
82
+ "i16" : np .array ([0 ], dtype = np .int16 ),
83
+ "i32" : np .array ([0 ], dtype = np .int32 ),
84
+ "i64" : np .array ([0 ], dtype = np .int64 ),
85
+ "u8" : np .array ([0 ], dtype = np .uint8 ),
86
+ "u16" : np .array ([0 ], dtype = np .uint16 ),
87
+ "u32" : np .array ([0 ], dtype = np .uint32 ),
88
+ "u64" : np .array ([0 ], dtype = np .uint64 ),
89
+ "f32" : np .array ([0 ], dtype = np .float32 ),
90
+ "f64" : np .array ([0 ], dtype = np .float64 ),
91
+ }
92
+ )
93
+ expected = empty_df_typed .copy ()
94
+ # No uint# support. Downcast since values in range for int#
95
+ expected ["u8" ] = expected ["u8" ].astype (np .int8 )
96
+ expected ["u16" ] = expected ["u16" ].astype (np .int16 )
97
+ expected ["u32" ] = expected ["u32" ].astype (np .int32 )
98
+ # No int64 supported at all. Downcast since values in range for int32
99
+ expected ["u64" ] = expected ["u64" ].astype (np .int32 )
100
+ expected ["i64" ] = expected ["i64" ].astype (np .int32 )
101
+
102
+ # GH 7369, make sure can read a 0-obs dta file
103
+ with tm .ensure_clean () as path :
104
+ empty_df_typed .to_stata (path , write_index = False , version = version )
105
+ empty_reread = read_stata (path )
106
+ tm .assert_frame_equal (expected , empty_reread )
107
+ tm .assert_series_equal (expected .dtypes , empty_reread .dtypes )
108
+
74
109
@pytest .mark .parametrize ("version" , [114 , 117 , 118 , 119 , None ])
75
110
def test_read_index_col_none (self , version ):
76
111
df = DataFrame ({"a" : range (5 ), "b" : ["b1" , "b2" , "b3" , "b4" , "b5" ]})
@@ -2274,3 +2309,21 @@ def test_nullable_support(dtype, version):
2274
2309
tm .assert_series_equal (df .a , reread .a )
2275
2310
tm .assert_series_equal (reread .b , expected_b )
2276
2311
tm .assert_series_equal (reread .c , expected_c )
2312
+
2313
+
2314
+ def test_empty_frame ():
2315
+ # GH 46240
2316
+ # create an empty DataFrame with int64 and float64 dtypes
2317
+ df = DataFrame (data = {"a" : range (3 ), "b" : [1.0 , 2.0 , 3.0 ]}).head (0 )
2318
+ with tm .ensure_clean () as path :
2319
+ df .to_stata (path , write_index = False , version = 117 )
2320
+ # Read entire dataframe
2321
+ df2 = read_stata (path )
2322
+ assert "b" in df2
2323
+ # Dtypes don't match since no support for int32
2324
+ dtypes = Series ({"a" : np .dtype ("int32" ), "b" : np .dtype ("float64" )})
2325
+ tm .assert_series_equal (df2 .dtypes , dtypes )
2326
+ # read one column of empty .dta file
2327
+ df3 = read_stata (path , columns = ["a" ])
2328
+ assert "b" not in df3
2329
+ tm .assert_series_equal (df3 .dtypes , dtypes .loc [["a" ]])
0 commit comments