@@ -1144,6 +1144,9 @@ def __init__(
1144
1144
self ._preserve_dtypes = preserve_dtypes
1145
1145
self ._columns = columns
1146
1146
self ._order_categoricals = order_categoricals
1147
+ self ._original_path_or_buf = path_or_buf
1148
+ self ._compression = compression
1149
+ self ._storage_options = storage_options
1147
1150
self ._encoding = ""
1148
1151
self ._chunksize = chunksize
1149
1152
self ._using_iterator = False
@@ -1153,6 +1156,7 @@ def __init__(
1153
1156
raise ValueError ("chunksize must be a positive integer when set." )
1154
1157
1155
1158
# State variables for the file
1159
+ self ._path_or_buf = None
1156
1160
self ._has_string_data = False
1157
1161
self ._missing_values = False
1158
1162
self ._can_read_value_labels = False
@@ -1163,12 +1167,24 @@ def __init__(
1163
1167
self ._lines_read = 0
1164
1168
1165
1169
self ._native_byteorder = _set_endianness (sys .byteorder )
1170
+
1171
+ def _ensure_open (self ) -> None :
1172
+ """
1173
+ Ensure the file has been opened and its header data read.
1174
+ """
1175
+ if self ._path_or_buf is None :
1176
+ self ._open_file ()
1177
+
1178
+ def _open_file (self ) -> None :
1179
+ """
1180
+ Open the file (with compression options, etc.), and read header information.
1181
+ """
1166
1182
with get_handle (
1167
- path_or_buf ,
1183
+ self . _original_path_or_buf ,
1168
1184
"rb" ,
1169
- storage_options = storage_options ,
1185
+ storage_options = self . _storage_options ,
1170
1186
is_text = False ,
1171
- compression = compression ,
1187
+ compression = self . _compression ,
1172
1188
) as handles :
1173
1189
# Copy to BytesIO, and ensure no encoding
1174
1190
self ._path_or_buf = BytesIO (handles .handle .read ())
@@ -1536,6 +1552,7 @@ def _decode(self, s: bytes) -> str:
1536
1552
return s .decode ("latin-1" )
1537
1553
1538
1554
def _read_value_labels (self ) -> None :
1555
+ self ._ensure_open ()
1539
1556
if self ._value_labels_read :
1540
1557
# Don't read twice
1541
1558
return
@@ -1655,6 +1672,7 @@ def read(
1655
1672
columns : Sequence [str ] | None = None ,
1656
1673
order_categoricals : bool | None = None ,
1657
1674
) -> DataFrame :
1675
+ self ._ensure_open ()
1658
1676
# Handle empty file or chunk. If reading incrementally raise
1659
1677
# StopIteration. If reading the whole thing return an empty
1660
1678
# data frame.
@@ -1983,57 +2001,72 @@ def data_label(self) -> str:
1983
2001
"""
1984
2002
Return data label of Stata file.
1985
2003
"""
2004
+ self ._ensure_open ()
1986
2005
return self ._data_label
1987
2006
1988
2007
@property
1989
2008
def typlist (self ) -> list [int | str ]:
1990
2009
"""
1991
2010
Return list of variable types.
1992
2011
"""
2012
+ self ._ensure_open ()
1993
2013
return self ._typlist
1994
2014
1995
2015
@property
1996
2016
def dtyplist (self ) -> list [str | np .dtype ]:
1997
2017
"""
1998
2018
Return list of variable types.
1999
2019
"""
2020
+ self ._ensure_open ()
2000
2021
return self ._dtyplist
2001
2022
2002
2023
@property
2003
2024
def lbllist (self ) -> list [str ]:
2004
2025
"""
2005
2026
Return list of variable labels.
2006
2027
"""
2028
+ self ._ensure_open ()
2007
2029
return self ._lbllist
2008
2030
2009
2031
@property
2010
2032
def varlist (self ) -> list [str ]:
2011
2033
"""
2012
2034
Return list of variable names.
2013
2035
"""
2036
+ self ._ensure_open ()
2014
2037
return self ._varlist
2015
2038
2016
2039
@property
2017
2040
def fmtlist (self ) -> list [str ]:
2018
2041
"""
2019
2042
Return list of variable formats.
2020
2043
"""
2044
+ self ._ensure_open ()
2021
2045
return self ._fmtlist
2022
2046
2023
2047
@property
2024
2048
def time_stamp (self ) -> str :
2025
2049
"""
2026
2050
Return time stamp of Stata file.
2027
2051
"""
2052
+ self ._ensure_open ()
2028
2053
return self ._time_stamp
2029
2054
2030
2055
@property
2031
2056
def format_version (self ) -> int :
2032
2057
"""
2033
2058
Return format version of Stata file.
2034
2059
"""
2060
+ self ._ensure_open ()
2035
2061
return self ._format_version
2036
2062
2063
+ @property
2064
+ def path_or_buf (self ) -> IO [bytes ]:
2065
+ """
2066
+ Return the file handle of the Stata file being read.
2067
+ """
2068
+ return self ._path_or_buf
2069
+
2037
2070
def variable_labels (self ) -> dict [str , str ]:
2038
2071
"""
2039
2072
Return a dict associating each variable name with corresponding label.
@@ -2042,6 +2075,7 @@ def variable_labels(self) -> dict[str, str]:
2042
2075
-------
2043
2076
dict
2044
2077
"""
2078
+ self ._ensure_open ()
2045
2079
return dict (zip (self ._varlist , self ._variable_labels ))
2046
2080
2047
2081
def value_labels (self ) -> dict [str , dict [float , str ]]:
0 commit comments