@@ -469,7 +469,7 @@ class PossiblePrecisionLoss(Warning):
469
469
470
470
471
471
precision_loss_doc = """
472
- Column converted from %s to %s , and some data are outside of the lossless
472
+ Column converted from {0} to {1} , and some data are outside of the lossless
473
473
conversion range. This may result in a loss of precision in the saved data.
474
474
"""
475
475
@@ -543,7 +543,7 @@ def _cast_to_stata_types(data: DataFrame) -> DataFrame:
543
543
object in a DataFrame.
544
544
"""
545
545
ws = ""
546
- # original, if small, if large
546
+ # original, if small, if large
547
547
conversion_data = (
548
548
(np .bool_ , np .int8 , np .int8 ),
549
549
(np .uint8 , np .int8 , np .int16 ),
@@ -563,7 +563,7 @@ def _cast_to_stata_types(data: DataFrame) -> DataFrame:
563
563
dtype = c_data [1 ]
564
564
else :
565
565
dtype = c_data [2 ]
566
- if c_data [2 ] == np .float64 : # Warn if necessary
566
+ if c_data [2 ] == np .int64 : # Warn if necessary
567
567
if data [col ].max () >= 2 ** 53 :
568
568
ws = precision_loss_doc .format ("uint64" , "float64" )
569
569
@@ -627,12 +627,12 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
627
627
self .value_labels = list (zip (np .arange (len (categories )), categories ))
628
628
self .value_labels .sort (key = lambda x : x [0 ])
629
629
self .text_len = 0
630
- self .off : List [int ] = []
631
- self .val : List [int ] = []
632
630
self .txt : List [bytes ] = []
633
631
self .n = 0
634
632
635
633
# Compute lengths and setup lists of offsets and labels
634
+ offsets : List [int ] = []
635
+ values : List [int ] = []
636
636
for vl in self .value_labels :
637
637
category = vl [1 ]
638
638
if not isinstance (category , str ):
@@ -642,9 +642,9 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
642
642
ValueLabelTypeMismatch ,
643
643
)
644
644
category = category .encode (encoding )
645
- self . off .append (self .text_len )
645
+ offsets .append (self .text_len )
646
646
self .text_len += len (category ) + 1 # +1 for the padding
647
- self . val .append (vl [0 ])
647
+ values .append (vl [0 ])
648
648
self .txt .append (category )
649
649
self .n += 1
650
650
@@ -655,8 +655,8 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
655
655
)
656
656
657
657
# Ensure int32
658
- self .off = np .array (self . off , dtype = np .int32 )
659
- self .val = np .array (self . val , dtype = np .int32 )
658
+ self .off = np .array (offsets , dtype = np .int32 )
659
+ self .val = np .array (values , dtype = np .int32 )
660
660
661
661
# Total length
662
662
self .len = 4 + 4 + 4 * self .n + 4 * self .n + self .text_len
@@ -868,23 +868,23 @@ def __init__(self):
868
868
# with a label, but the underlying variable is -127 to 100
869
869
# we're going to drop the label and cast to int
870
870
self .DTYPE_MAP = dict (
871
- list (zip (range (1 , 245 ), ["a" + str (i ) for i in range (1 , 245 )]))
871
+ list (zip (range (1 , 245 ), [np . dtype ( "a" + str (i ) ) for i in range (1 , 245 )]))
872
872
+ [
873
- (251 , np .int8 ),
874
- (252 , np .int16 ),
875
- (253 , np .int32 ),
876
- (254 , np .float32 ),
877
- (255 , np .float64 ),
873
+ (251 , np .dtype ( np . int8 ) ),
874
+ (252 , np .dtype ( np . int16 ) ),
875
+ (253 , np .dtype ( np . int32 ) ),
876
+ (254 , np .dtype ( np . float32 ) ),
877
+ (255 , np .dtype ( np . float64 ) ),
878
878
]
879
879
)
880
880
self .DTYPE_MAP_XML = dict (
881
881
[
882
- (32768 , np .uint8 ), # Keys to GSO
883
- (65526 , np .float64 ),
884
- (65527 , np .float32 ),
885
- (65528 , np .int32 ),
886
- (65529 , np .int16 ),
887
- (65530 , np .int8 ),
882
+ (32768 , np .dtype ( np . uint8 ) ), # Keys to GSO
883
+ (65526 , np .dtype ( np . float64 ) ),
884
+ (65527 , np .dtype ( np . float32 ) ),
885
+ (65528 , np .dtype ( np . int32 ) ),
886
+ (65529 , np .dtype ( np . int16 ) ),
887
+ (65530 , np .dtype ( np . int8 ) ),
888
888
]
889
889
)
890
890
# error: Argument 1 to "list" has incompatible type "str";
@@ -1057,7 +1057,7 @@ def __init__(
1057
1057
self ._column_selector_set = False
1058
1058
self ._value_labels_read = False
1059
1059
self ._data_read = False
1060
- self ._dtype = None
1060
+ self ._dtype : Optional [ np . dtype ] = None
1061
1061
self ._lines_read = 0
1062
1062
1063
1063
self ._native_byteorder = _set_endianness (sys .byteorder )
@@ -1193,7 +1193,7 @@ def _read_new_header(self) -> None:
1193
1193
# Get data type information, works for versions 117-119.
1194
1194
def _get_dtypes (
1195
1195
self , seek_vartypes : int
1196
- ) -> Tuple [List [Union [int , str ]], List [Union [int , np .dtype ]]]:
1196
+ ) -> Tuple [List [Union [int , str ]], List [Union [str , np .dtype ]]]:
1197
1197
1198
1198
self .path_or_buf .seek (seek_vartypes )
1199
1199
raw_typlist = [
@@ -1518,11 +1518,8 @@ def _read_strls(self) -> None:
1518
1518
self .GSO [str (v_o )] = decoded_va
1519
1519
1520
1520
def __next__ (self ) -> DataFrame :
1521
- if self ._chunksize is None :
1522
- raise ValueError (
1523
- "chunksize must be set to a positive integer to use as an iterator."
1524
- )
1525
- return self .read (nrows = self ._chunksize or 1 )
1521
+ self ._chunksize = 1 if self ._chunksize is None else self ._chunksize
1522
+ return self .read (nrows = self ._chunksize )
1526
1523
1527
1524
def get_chunk (self , size : Optional [int ] = None ) -> DataFrame :
1528
1525
"""
@@ -1690,11 +1687,15 @@ def any_startswith(x: str) -> bool:
1690
1687
convert = False
1691
1688
for col in data :
1692
1689
dtype = data [col ].dtype
1693
- if dtype in (np .float16 , np .float32 ):
1694
- dtype = np .float64
1690
+ if dtype in (np .dtype ( np . float16 ) , np .dtype ( np . float32 ) ):
1691
+ dtype = np .dtype ( np . float64 )
1695
1692
convert = True
1696
- elif dtype in (np .int8 , np .int16 , np .int32 ):
1697
- dtype = np .int64
1693
+ elif dtype in (
1694
+ np .dtype (np .int8 ),
1695
+ np .dtype (np .int16 ),
1696
+ np .dtype (np .int32 ),
1697
+ ):
1698
+ dtype = np .dtype (np .int64 )
1698
1699
convert = True
1699
1700
retyped_data .append ((col , data [col ].astype (dtype )))
1700
1701
if convert :
@@ -1807,7 +1808,7 @@ def _do_convert_categoricals(
1807
1808
column = data [col ]
1808
1809
key_matches = column .isin (keys )
1809
1810
if self ._chunksize is not None and key_matches .all ():
1810
- initial_categories = keys
1811
+ initial_categories : Optional [ np . ndarray ] = keys
1811
1812
# If all categories are in the keys and we are iterating,
1812
1813
# use the same keys for all chunks. If some are missing
1813
1814
# value labels, then we will fall back to the categories
@@ -2024,7 +2025,7 @@ def _convert_datetime_to_stata_type(fmt: str) -> np.dtype:
2024
2025
"ty" ,
2025
2026
"%ty" ,
2026
2027
]:
2027
- return np .float64 # Stata expects doubles for SIFs
2028
+ return np .dtype ( np . float64 ) # Stata expects doubles for SIFs
2028
2029
else :
2029
2030
raise NotImplementedError (f"Format { fmt } not implemented" )
2030
2031
0 commit comments