@@ -1143,6 +1143,9 @@ def __init__(
1143
1143
self ._preserve_dtypes = preserve_dtypes
1144
1144
self ._columns = columns
1145
1145
self ._order_categoricals = order_categoricals
1146
+ self ._original_path_or_buf = path_or_buf
1147
+ self ._compression = compression
1148
+ self ._storage_options = storage_options
1146
1149
self ._encoding = ""
1147
1150
self ._chunksize = chunksize
1148
1151
self ._using_iterator = False
@@ -1152,6 +1155,9 @@ def __init__(
1152
1155
raise ValueError ("chunksize must be a positive integer when set." )
1153
1156
1154
1157
# State variables for the file
1158
+ # NB: _path_or_buf is mistyped on purpose, since the alternative is to placate
1159
+ # mypy by having an assert before every read.
1160
+ self ._path_or_buf : IO [bytes ] = None # type: ignore[assignment]
1155
1161
self ._has_string_data = False
1156
1162
self ._missing_values = False
1157
1163
self ._can_read_value_labels = False
@@ -1162,12 +1168,24 @@ def __init__(
1162
1168
self ._lines_read = 0
1163
1169
1164
1170
self ._native_byteorder = _set_endianness (sys .byteorder )
1171
+
1172
+ def _ensure_open (self ) -> None :
1173
+ """
1174
+ Ensure the file has been opened and its header data read.
1175
+ """
1176
+ if self ._path_or_buf is None :
1177
+ self ._open_file ()
1178
+
1179
+ def _open_file (self ) -> None :
1180
+ """
1181
+ Open the file (with compression options, etc.), and read header information.
1182
+ """
1165
1183
with get_handle (
1166
- path_or_buf ,
1184
+ self . _original_path_or_buf ,
1167
1185
"rb" ,
1168
- storage_options = storage_options ,
1186
+ storage_options = self . _storage_options ,
1169
1187
is_text = False ,
1170
- compression = compression ,
1188
+ compression = self . _compression ,
1171
1189
) as handles :
1172
1190
# Copy to BytesIO, and ensure no encoding
1173
1191
self ._path_or_buf = BytesIO (handles .handle .read ())
@@ -1534,6 +1552,7 @@ def _decode(self, s: bytes) -> str:
1534
1552
return s .decode ("latin-1" )
1535
1553
1536
1554
def _read_value_labels (self ) -> None :
1555
+ self ._ensure_open ()
1537
1556
if self ._value_labels_read :
1538
1557
# Don't read twice
1539
1558
return
@@ -1653,6 +1672,7 @@ def read(
1653
1672
columns : Sequence [str ] | None = None ,
1654
1673
order_categoricals : bool | None = None ,
1655
1674
) -> DataFrame :
1675
+ self ._ensure_open ()
1656
1676
# Handle empty file or chunk. If reading incrementally raise
1657
1677
# StopIteration. If reading the whole thing return an empty
1658
1678
# data frame.
@@ -1981,57 +2001,72 @@ def data_label(self) -> str:
1981
2001
"""
1982
2002
Return data label of Stata file.
1983
2003
"""
2004
+ self ._ensure_open ()
1984
2005
return self ._data_label
1985
2006
1986
2007
@property
1987
2008
def typlist (self ) -> list [int | str ]:
1988
2009
"""
1989
2010
Return list of variable types.
1990
2011
"""
2012
+ self ._ensure_open ()
1991
2013
return self ._typlist
1992
2014
1993
2015
@property
1994
2016
def dtyplist (self ) -> list [str | np .dtype ]:
1995
2017
"""
1996
2018
Return list of variable types.
1997
2019
"""
2020
+ self ._ensure_open ()
1998
2021
return self ._dtyplist
1999
2022
2000
2023
@property
2001
2024
def lbllist (self ) -> list [str ]:
2002
2025
"""
2003
2026
Return list of variable labels.
2004
2027
"""
2028
+ self ._ensure_open ()
2005
2029
return self ._lbllist
2006
2030
2007
2031
@property
2008
2032
def varlist (self ) -> list [str ]:
2009
2033
"""
2010
2034
Return list of variable names.
2011
2035
"""
2036
+ self ._ensure_open ()
2012
2037
return self ._varlist
2013
2038
2014
2039
@property
2015
2040
def fmtlist (self ) -> list [str ]:
2016
2041
"""
2017
2042
Return list of variable formats.
2018
2043
"""
2044
+ self ._ensure_open ()
2019
2045
return self ._fmtlist
2020
2046
2021
2047
@property
2022
2048
def time_stamp (self ) -> str :
2023
2049
"""
2024
2050
Return time stamp of Stata file.
2025
2051
"""
2052
+ self ._ensure_open ()
2026
2053
return self ._time_stamp
2027
2054
2028
2055
@property
2029
2056
def format_version (self ) -> int :
2030
2057
"""
2031
2058
Return format version of Stata file.
2032
2059
"""
2060
+ self ._ensure_open ()
2033
2061
return self ._format_version
2034
2062
2063
+ @property
2064
+ def path_or_buf (self ) -> IO [bytes ]:
2065
+ """
2066
+ Return the file handle of the Stata file being read.
2067
+ """
2068
+ return self ._path_or_buf
2069
+
2035
2070
def variable_labels (self ) -> dict [str , str ]:
2036
2071
"""
2037
2072
Return a dict associating each variable name with corresponding label.
@@ -2040,6 +2075,7 @@ def variable_labels(self) -> dict[str, str]:
2040
2075
-------
2041
2076
dict
2042
2077
"""
2078
+ self ._ensure_open ()
2043
2079
return dict (zip (self ._varlist , self ._variable_labels ))
2044
2080
2045
2081
def value_labels (self ) -> dict [str , dict [float , str ]]:
0 commit comments