@@ -1118,6 +1118,8 @@ def __init__(self) -> None:
1118
1118
class StataReader (StataParser , abc .Iterator ):
1119
1119
__doc__ = _stata_reader_doc
1120
1120
1121
+ _path_or_buf : IO [bytes ] # Will be assigned by `_open_file`.
1122
+
1121
1123
def __init__ (
1122
1124
self ,
1123
1125
path_or_buf : FilePath | ReadBuffer [bytes ],
@@ -1144,6 +1146,9 @@ def __init__(
1144
1146
self ._preserve_dtypes = preserve_dtypes
1145
1147
self ._columns = columns
1146
1148
self ._order_categoricals = order_categoricals
1149
+ self ._original_path_or_buf = path_or_buf
1150
+ self ._compression = compression
1151
+ self ._storage_options = storage_options
1147
1152
self ._encoding = ""
1148
1153
self ._chunksize = chunksize
1149
1154
self ._using_iterator = False
@@ -1153,6 +1158,7 @@ def __init__(
1153
1158
raise ValueError ("chunksize must be a positive integer when set." )
1154
1159
1155
1160
# State variables for the file
1161
+ self ._close_file : Callable [[], None ] | None = None
1156
1162
self ._has_string_data = False
1157
1163
self ._missing_values = False
1158
1164
self ._can_read_value_labels = False
@@ -1163,12 +1169,24 @@ def __init__(
1163
1169
self ._lines_read = 0
1164
1170
1165
1171
self ._native_byteorder = _set_endianness (sys .byteorder )
1172
+
1173
+ def _ensure_open (self ) -> None :
1174
+ """
1175
+ Ensure the file has been opened and its header data read.
1176
+ """
1177
+ if not hasattr (self , "_path_or_buf" ):
1178
+ self ._open_file ()
1179
+
1180
+ def _open_file (self ) -> None :
1181
+ """
1182
+ Open the file (with compression options, etc.), and read header information.
1183
+ """
1166
1184
with get_handle (
1167
- path_or_buf ,
1185
+ self . _original_path_or_buf ,
1168
1186
"rb" ,
1169
- storage_options = storage_options ,
1187
+ storage_options = self . _storage_options ,
1170
1188
is_text = False ,
1171
- compression = compression ,
1189
+ compression = self . _compression ,
1172
1190
) as handles :
1173
1191
# Copy to BytesIO, and ensure no encoding
1174
1192
self ._path_or_buf = BytesIO (handles .handle .read ())
@@ -1535,6 +1553,7 @@ def _decode(self, s: bytes) -> str:
1535
1553
return s .decode ("latin-1" )
1536
1554
1537
1555
def _read_value_labels (self ) -> None :
1556
+ self ._ensure_open ()
1538
1557
if self ._value_labels_read :
1539
1558
# Don't read twice
1540
1559
return
@@ -1654,6 +1673,7 @@ def read(
1654
1673
columns : Sequence [str ] | None = None ,
1655
1674
order_categoricals : bool | None = None ,
1656
1675
) -> DataFrame :
1676
+ self ._ensure_open ()
1657
1677
# Handle empty file or chunk. If reading incrementally raise
1658
1678
# StopIteration. If reading the whole thing return an empty
1659
1679
# data frame.
@@ -1982,48 +2002,15 @@ def data_label(self) -> str:
1982
2002
"""
1983
2003
Return data label of Stata file.
1984
2004
"""
2005
+ self ._ensure_open ()
1985
2006
return self ._data_label
1986
2007
1987
- @property
1988
- def typlist (self ) -> list [int | str ]:
1989
- """
1990
- Return list of variable types.
1991
- """
1992
- return self ._typlist
1993
-
1994
- @property
1995
- def dtyplist (self ) -> list [str | np .dtype ]:
1996
- """
1997
- Return list of variable types.
1998
- """
1999
- return self ._dtyplist
2000
-
2001
- @property
2002
- def lbllist (self ) -> list [str ]:
2003
- """
2004
- Return list of variable labels.
2005
- """
2006
- return self ._lbllist
2007
-
2008
- @property
2009
- def varlist (self ) -> list [str ]:
2010
- """
2011
- Return list of variable names.
2012
- """
2013
- return self ._varlist
2014
-
2015
- @property
2016
- def fmtlist (self ) -> list [str ]:
2017
- """
2018
- Return list of variable formats.
2019
- """
2020
- return self ._fmtlist
2021
-
2022
2008
@property
2023
2009
def time_stamp (self ) -> str :
2024
2010
"""
2025
2011
Return time stamp of Stata file.
2026
2012
"""
2013
+ self ._ensure_open ()
2027
2014
return self ._time_stamp
2028
2015
2029
2016
def variable_labels (self ) -> dict [str , str ]:
@@ -2034,6 +2021,7 @@ def variable_labels(self) -> dict[str, str]:
2034
2021
-------
2035
2022
dict
2036
2023
"""
2024
+ self ._ensure_open ()
2037
2025
return dict (zip (self ._varlist , self ._variable_labels ))
2038
2026
2039
2027
def value_labels (self ) -> dict [str , dict [float , str ]]:
0 commit comments