@@ -1114,6 +1114,8 @@ def __init__(self) -> None:
1114
1114
class StataReader (StataParser , abc .Iterator ):
1115
1115
__doc__ = _stata_reader_doc
1116
1116
1117
+ _path_or_buf : IO [bytes ] # Will be assigned by `_open_file`.
1118
+
1117
1119
def __init__ (
1118
1120
self ,
1119
1121
path_or_buf : FilePath | ReadBuffer [bytes ],
@@ -1140,6 +1142,9 @@ def __init__(
1140
1142
self ._preserve_dtypes = preserve_dtypes
1141
1143
self ._columns = columns
1142
1144
self ._order_categoricals = order_categoricals
1145
+ self ._original_path_or_buf = path_or_buf
1146
+ self ._compression = compression
1147
+ self ._storage_options = storage_options
1143
1148
self ._encoding = ""
1144
1149
self ._chunksize = chunksize
1145
1150
self ._using_iterator = False
@@ -1149,6 +1154,7 @@ def __init__(
1149
1154
raise ValueError ("chunksize must be a positive integer when set." )
1150
1155
1151
1156
# State variables for the file
1157
+ self ._close_file : Callable [[], None ] | None = None
1152
1158
self ._has_string_data = False
1153
1159
self ._missing_values = False
1154
1160
self ._can_read_value_labels = False
@@ -1159,12 +1165,24 @@ def __init__(
1159
1165
self ._lines_read = 0
1160
1166
1161
1167
self ._native_byteorder = _set_endianness (sys .byteorder )
1168
+
1169
+ def _ensure_open (self ) -> None :
1170
+ """
1171
+ Ensure the file has been opened and its header data read.
1172
+ """
1173
+ if not hasattr (self , "_path_or_buf" ):
1174
+ self ._open_file ()
1175
+
1176
+ def _open_file (self ) -> None :
1177
+ """
1178
+ Open the file (with compression options, etc.), and read header information.
1179
+ """
1162
1180
with get_handle (
1163
- path_or_buf ,
1181
+ self . _original_path_or_buf ,
1164
1182
"rb" ,
1165
- storage_options = storage_options ,
1183
+ storage_options = self . _storage_options ,
1166
1184
is_text = False ,
1167
- compression = compression ,
1185
+ compression = self . _compression ,
1168
1186
) as handles :
1169
1187
# Copy to BytesIO, and ensure no encoding
1170
1188
self ._path_or_buf = BytesIO (handles .handle .read ())
@@ -1530,6 +1548,7 @@ def _decode(self, s: bytes) -> str:
1530
1548
return s .decode ("latin-1" )
1531
1549
1532
1550
def _read_value_labels (self ) -> None :
1551
+ self ._ensure_open ()
1533
1552
if self ._value_labels_read :
1534
1553
# Don't read twice
1535
1554
return
@@ -1649,6 +1668,7 @@ def read(
1649
1668
columns : Sequence [str ] | None = None ,
1650
1669
order_categoricals : bool | None = None ,
1651
1670
) -> DataFrame :
1671
+ self ._ensure_open ()
1652
1672
# Handle empty file or chunk. If reading incrementally raise
1653
1673
# StopIteration. If reading the whole thing return an empty
1654
1674
# data frame.
@@ -1976,48 +1996,15 @@ def data_label(self) -> str:
1976
1996
"""
1977
1997
Return data label of Stata file.
1978
1998
"""
1999
+ self ._ensure_open ()
1979
2000
return self ._data_label
1980
2001
1981
- @property
1982
- def typlist (self ) -> list [int | str ]:
1983
- """
1984
- Return list of variable types.
1985
- """
1986
- return self ._typlist
1987
-
1988
- @property
1989
- def dtyplist (self ) -> list [str | np .dtype ]:
1990
- """
1991
- Return list of variable types.
1992
- """
1993
- return self ._dtyplist
1994
-
1995
- @property
1996
- def lbllist (self ) -> list [str ]:
1997
- """
1998
- Return list of variable labels.
1999
- """
2000
- return self ._lbllist
2001
-
2002
- @property
2003
- def varlist (self ) -> list [str ]:
2004
- """
2005
- Return list of variable names.
2006
- """
2007
- return self ._varlist
2008
-
2009
- @property
2010
- def fmtlist (self ) -> list [str ]:
2011
- """
2012
- Return list of variable formats.
2013
- """
2014
- return self ._fmtlist
2015
-
2016
2002
@property
2017
2003
def time_stamp (self ) -> str :
2018
2004
"""
2019
2005
Return time stamp of Stata file.
2020
2006
"""
2007
+ self ._ensure_open ()
2021
2008
return self ._time_stamp
2022
2009
2023
2010
def variable_labels (self ) -> dict [str , str ]:
@@ -2028,6 +2015,7 @@ def variable_labels(self) -> dict[str, str]:
2028
2015
-------
2029
2016
dict
2030
2017
"""
2018
+ self ._ensure_open ()
2031
2019
return dict (zip (self ._varlist , self ._variable_labels ))
2032
2020
2033
2021
def value_labels (self ) -> dict [str , dict [float , str ]]:
0 commit comments