File tree 2 files changed +26
-0
lines changed
2 files changed +26
-0
lines changed Original file line number Diff line number Diff line change 106
106
iterator : bool, default False
107
107
Return StataReader object."""
108
108
109
+ _reader_notes = """\
110
+ Notes
111
+ -----
112
+ Categorical variables read through an iterator may not have the same
113
+ categories and dtype. This occurs when a variable stored in a DTA
114
+ file is associated to an incomplete set of value labels that only
115
+ label a strict subset of the values."""
116
+
109
117
_read_stata_doc = f"""
110
118
Read Stata file into DataFrame.
111
119
135
143
io.stata.StataReader : Low-level reader for Stata data files.
136
144
DataFrame.to_stata: Export Stata data files.
137
145
146
+ { _reader_notes }
147
+
138
148
Examples
139
149
--------
140
150
Read a Stata dta file:
176
186
{ _statafile_processing_params1 }
177
187
{ _statafile_processing_params2 }
178
188
{ _chunksize_params }
189
+
190
+ { _reader_notes }
179
191
"""
180
192
181
193
Original file line number Diff line number Diff line change @@ -1969,3 +1969,17 @@ def test_iterator_errors(dirpath):
1969
1969
with pytest .raises (ValueError , match = "chunksize must be set to a positive" ):
1970
1970
with StataReader (dta_file ) as reader :
1971
1971
reader .__next__ ()
1972
+
1973
+
1974
+ def test_iterator_value_labels ():
1975
+ # GH 31544
1976
+ values = ["c_label" , "b_label" ] + ["a_label" ] * 500
1977
+ df = DataFrame ({f"col{ k } " : pd .Categorical (values , ordered = True ) for k in range (2 )})
1978
+ with tm .ensure_clean () as path :
1979
+ df .to_stata (path , write_index = False )
1980
+ reader = pd .read_stata (path , chunksize = 100 )
1981
+ expected = pd .Index (["a_label" , "b_label" , "c_label" ], dtype = "object" )
1982
+ for j , chunk in enumerate (reader ):
1983
+ for i in range (2 ):
1984
+ tm .assert_index_equal (chunk .dtypes [i ].categories , expected )
1985
+ tm .assert_frame_equal (chunk , df .iloc [j * 100 : (j + 1 ) * 100 ])
You can’t perform that action at this time.
0 commit comments