@@ -1143,6 +1143,9 @@ def __init__(
1143
1143
self ._preserve_dtypes = preserve_dtypes
1144
1144
self ._columns = columns
1145
1145
self ._order_categoricals = order_categoricals
1146
+ self ._original_path_or_buf = path_or_buf
1147
+ self ._compression = compression
1148
+ self ._storage_options = storage_options
1146
1149
self ._encoding = ""
1147
1150
self ._chunksize = chunksize
1148
1151
self ._using_iterator = False
@@ -1152,6 +1155,9 @@ def __init__(
1152
1155
raise ValueError ("chunksize must be a positive integer when set." )
1153
1156
1154
1157
# State variables for the file
1158
+ # NB: _path_or_buf is mistyped on purpose, since the alternative is to placate
1159
+ # mypy by having an assert before every read.
1160
+ self ._path_or_buf : IO [bytes ] = None # type: ignore[assignment]
1155
1161
self ._has_string_data = False
1156
1162
self ._missing_values = False
1157
1163
self ._can_read_value_labels = False
@@ -1162,12 +1168,24 @@ def __init__(
1162
1168
self ._lines_read = 0
1163
1169
1164
1170
self ._native_byteorder = _set_endianness (sys .byteorder )
1171
+
1172
+ def _ensure_open (self ) -> None :
1173
+ """
1174
+ Ensure the file has been opened and its header data read.
1175
+ """
1176
+ if self ._path_or_buf is None :
1177
+ self ._open_file ()
1178
+
1179
+ def _open_file (self ) -> None :
1180
+ """
1181
+ Open the file (with compression options, etc.), and read header information.
1182
+ """
1165
1183
with get_handle (
1166
- path_or_buf ,
1184
+ self . _original_path_or_buf ,
1167
1185
"rb" ,
1168
- storage_options = storage_options ,
1186
+ storage_options = self . _storage_options ,
1169
1187
is_text = False ,
1170
- compression = compression ,
1188
+ compression = self . _compression ,
1171
1189
) as handles :
1172
1190
# Copy to BytesIO, and ensure no encoding
1173
1191
self ._path_or_buf = BytesIO (handles .handle .read ())
@@ -1534,6 +1552,7 @@ def _decode(self, s: bytes) -> str:
1534
1552
return s .decode ("latin-1" )
1535
1553
1536
1554
def _read_value_labels (self ) -> None :
1555
+ self ._ensure_open ()
1537
1556
if self ._value_labels_read :
1538
1557
# Don't read twice
1539
1558
return
@@ -1653,6 +1672,7 @@ def read(
1653
1672
columns : Sequence [str ] | None = None ,
1654
1673
order_categoricals : bool | None = None ,
1655
1674
) -> DataFrame :
1675
+ self ._ensure_open ()
1656
1676
# Handle empty file or chunk. If reading incrementally raise
1657
1677
# StopIteration. If reading the whole thing return an empty
1658
1678
# data frame.
@@ -1981,55 +2001,23 @@ def data_label(self) -> str:
1981
2001
"""
1982
2002
Return data label of Stata file.
1983
2003
"""
2004
+ self ._ensure_open ()
1984
2005
return self ._data_label
1985
2006
1986
- @property
1987
- def typlist (self ) -> list [int | str ]:
1988
- """
1989
- Return list of variable types.
1990
- """
1991
- return self ._typlist
1992
-
1993
- @property
1994
- def dtyplist (self ) -> list [str | np .dtype ]:
1995
- """
1996
- Return list of variable types.
1997
- """
1998
- return self ._dtyplist
1999
-
2000
- @property
2001
- def lbllist (self ) -> list [str ]:
2002
- """
2003
- Return list of variable labels.
2004
- """
2005
- return self ._lbllist
2006
-
2007
- @property
2008
- def varlist (self ) -> list [str ]:
2009
- """
2010
- Return list of variable names.
2011
- """
2012
- return self ._varlist
2013
-
2014
- @property
2015
- def fmtlist (self ) -> list [str ]:
2016
- """
2017
- Return list of variable formats.
2018
- """
2019
- return self ._fmtlist
2020
-
2021
2007
@property
2022
2008
def time_stamp (self ) -> str :
2023
2009
"""
2024
2010
Return time stamp of Stata file.
2025
2011
"""
2012
+ self ._ensure_open ()
2026
2013
return self ._time_stamp
2027
2014
2028
2015
@property
2029
2016
def format_version (self ) -> int :
2030
2017
"""
2031
2018
Return format version of Stata file.
2032
2019
"""
2020
+ self ._ensure_open ()
2033
2021
return self ._format_version
2034
2022
2035
2023
def variable_labels (self ) -> dict [str , str ]:
@@ -2040,6 +2028,7 @@ def variable_labels(self) -> dict[str, str]:
2040
2028
-------
2041
2029
dict
2042
2030
"""
2031
+ self ._ensure_open ()
2043
2032
return dict (zip (self ._varlist , self ._variable_labels ))
2044
2033
2045
2034
def value_labels (self ) -> dict [str , dict [float , str ]]:
0 commit comments