26
26
Optional ,
27
27
Sequence ,
28
28
Tuple ,
29
- Union
29
+ Union ,
30
30
)
31
31
import warnings
32
32
41
41
from pandas .core .dtypes .common import (
42
42
ensure_object ,
43
43
is_categorical_dtype ,
44
- is_datetime64_dtype
44
+ is_datetime64_dtype ,
45
45
)
46
46
47
47
from pandas import (
52
52
concat ,
53
53
isna ,
54
54
to_datetime ,
55
- to_timedelta
55
+ to_timedelta ,
56
56
)
57
57
from pandas .core .frame import DataFrame
58
58
from pandas .core .indexes .base import Index
63
63
get_filepath_or_buffer ,
64
64
get_handle ,
65
65
infer_compression ,
66
- stringify_path
66
+ stringify_path ,
67
67
)
68
68
69
69
_version_error = (
@@ -556,7 +556,7 @@ def _cast_to_stata_types(data: DataFrame) -> DataFrame:
556
556
(np .bool_ , np .int8 , np .int8 ),
557
557
(np .uint8 , np .int8 , np .int16 ),
558
558
(np .uint16 , np .int16 , np .int32 ),
559
- (np .uint32 , np .int32 , np .int64 )
559
+ (np .uint32 , np .int32 , np .int64 ),
560
560
)
561
561
562
562
float32_max = struct .unpack ("<f" , b"\xff \xff \xff \x7e " )[0 ]
@@ -647,7 +647,7 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
647
647
category = str (category )
648
648
warnings .warn (
649
649
value_label_mismatch_doc .format (catarray .name ),
650
- ValueLabelTypeMismatch
650
+ ValueLabelTypeMismatch ,
651
651
)
652
652
category = category .encode (encoding )
653
653
self .off .append (self .text_len )
@@ -794,7 +794,7 @@ class StataMissingValue:
794
794
"int16" : 32741 ,
795
795
"int32" : 2147483621 ,
796
796
"float32" : struct .unpack ("<f" , float32_base )[0 ],
797
- "float64" : struct .unpack ("<d" , float64_base )[0 ]
797
+ "float64" : struct .unpack ("<d" , float64_base )[0 ],
798
798
}
799
799
800
800
def __init__ (self , value : Union [int , float ]):
@@ -882,7 +882,7 @@ def __init__(self):
882
882
(252 , np .int16 ),
883
883
(253 , np .int32 ),
884
884
(254 , np .float32 ),
885
- (255 , np .float64 )
885
+ (255 , np .float64 ),
886
886
]
887
887
)
888
888
self .DTYPE_MAP_XML = dict (
@@ -892,7 +892,7 @@ def __init__(self):
892
892
(65527 , np .float32 ),
893
893
(65528 , np .int32 ),
894
894
(65529 , np .int16 ),
895
- (65530 , np .int8 )
895
+ (65530 , np .int8 ),
896
896
]
897
897
)
898
898
self .TYPE_MAP = list (range (251 )) + list ("bhlfd" )
@@ -904,7 +904,7 @@ def __init__(self):
904
904
(65527 , "f" ),
905
905
(65528 , "l" ),
906
906
(65529 , "h" ),
907
- (65530 , "b" )
907
+ (65530 , "b" ),
908
908
]
909
909
)
910
910
# NOTE: technically, some of these are wrong. there are more numbers
@@ -920,12 +920,12 @@ def __init__(self):
920
920
"l" : (- 2147483647 , 2147483620 ),
921
921
"f" : (
922
922
np .float32 (struct .unpack ("<f" , float32_min )[0 ]),
923
- np .float32 (struct .unpack ("<f" , float32_max )[0 ])
923
+ np .float32 (struct .unpack ("<f" , float32_max )[0 ]),
924
924
),
925
925
"d" : (
926
926
np .float64 (struct .unpack ("<d" , float64_min )[0 ]),
927
- np .float64 (struct .unpack ("<d" , float64_max )[0 ])
928
- )
927
+ np .float64 (struct .unpack ("<d" , float64_max )[0 ]),
928
+ ),
929
929
}
930
930
931
931
self .OLD_TYPE_MAPPING = {
@@ -945,15 +945,15 @@ def __init__(self):
945
945
"f" : np .float32 (struct .unpack ("<f" , b"\x00 \x00 \x00 \x7f " )[0 ]),
946
946
"d" : np .float64 (
947
947
struct .unpack ("<d" , b"\x00 \x00 \x00 \x00 \x00 \x00 \xe0 \x7f " )[0 ]
948
- )
948
+ ),
949
949
}
950
950
self .NUMPY_TYPE_MAP = {
951
951
"b" : "i1" ,
952
952
"h" : "i2" ,
953
953
"l" : "i4" ,
954
954
"f" : "f4" ,
955
955
"d" : "f8" ,
956
- "Q" : "u8"
956
+ "Q" : "u8" ,
957
957
}
958
958
959
959
# Reserved words cannot be used as variable names
@@ -1017,7 +1017,7 @@ def __init__(self):
1017
1017
"_cons" ,
1018
1018
"_se" ,
1019
1019
"with" ,
1020
- "_n"
1020
+ "_n" ,
1021
1021
)
1022
1022
1023
1023
@@ -1035,7 +1035,7 @@ def __init__(
1035
1035
columns : Optional [Sequence [str ]] = None ,
1036
1036
order_categoricals : bool = True ,
1037
1037
chunksize : Optional [int ] = None ,
1038
- storage_options : StorageOptions = None
1038
+ storage_options : StorageOptions = None ,
1039
1039
):
1040
1040
super ().__init__ ()
1041
1041
self .col_sizes : List [int ] = []
@@ -1184,7 +1184,7 @@ def _read_new_header(self) -> None:
1184
1184
self .path_or_buf .seek (self ._seek_sortlist )
1185
1185
self .srtlist = struct .unpack (
1186
1186
self .byteorder + ("h" * (self .nvar + 1 )),
1187
- self .path_or_buf .read (2 * (self .nvar + 1 ))
1187
+ self .path_or_buf .read (2 * (self .nvar + 1 )),
1188
1188
)[:- 1 ]
1189
1189
1190
1190
self .path_or_buf .seek (self ._seek_formats )
@@ -1366,7 +1366,7 @@ def _read_old_header(self, first_char: bytes) -> None:
1366
1366
]
1367
1367
self .srtlist = struct .unpack (
1368
1368
self .byteorder + ("h" * (self .nvar + 1 )),
1369
- self .path_or_buf .read (2 * (self .nvar + 1 ))
1369
+ self .path_or_buf .read (2 * (self .nvar + 1 )),
1370
1370
)[:- 1 ]
1371
1371
1372
1372
self .fmtlist = self ._get_fmtlist ()
@@ -1556,7 +1556,7 @@ def read(
1556
1556
convert_missing : Optional [bool ] = None ,
1557
1557
preserve_dtypes : Optional [bool ] = None ,
1558
1558
columns : Optional [Sequence [str ]] = None ,
1559
- order_categoricals : Optional [bool ] = None
1559
+ order_categoricals : Optional [bool ] = None ,
1560
1560
) -> DataFrame :
1561
1561
# Handle empty file or chunk. If reading incrementally raise
1562
1562
# StopIteration. If reading the whole thing return an empty
@@ -1796,7 +1796,7 @@ def _do_convert_categoricals(
1796
1796
data : DataFrame ,
1797
1797
value_label_dict : Dict [str , Dict [Union [float , int ], str ]],
1798
1798
lbllist : Sequence [str ],
1799
- order_categoricals : bool
1799
+ order_categoricals : bool ,
1800
1800
) -> DataFrame :
1801
1801
"""
1802
1802
Converts categorical columns to Categorical type.
@@ -1910,7 +1910,7 @@ def read_stata(
1910
1910
order_categoricals : bool = True ,
1911
1911
chunksize : Optional [int ] = None ,
1912
1912
iterator : bool = False ,
1913
- storage_options : StorageOptions = None
1913
+ storage_options : StorageOptions = None ,
1914
1914
) -> Union [DataFrame , StataReader ]:
1915
1915
1916
1916
reader = StataReader (
@@ -1923,7 +1923,7 @@ def read_stata(
1923
1923
columns = columns ,
1924
1924
order_categoricals = order_categoricals ,
1925
1925
chunksize = chunksize ,
1926
- storage_options = storage_options
1926
+ storage_options = storage_options ,
1927
1927
)
1928
1928
1929
1929
if iterator or chunksize :
@@ -1939,7 +1939,7 @@ def read_stata(
1939
1939
def _open_file_binary_write (
1940
1940
fname : FilePathOrBuffer ,
1941
1941
compression : CompressionOptions ,
1942
- storage_options : StorageOptions = None
1942
+ storage_options : StorageOptions = None ,
1943
1943
) -> Tuple [BinaryIO , bool , CompressionOptions ]:
1944
1944
"""
1945
1945
Open a binary file or no-op if file-like.
@@ -2024,7 +2024,7 @@ def _convert_datetime_to_stata_type(fmt: str) -> np.dtype:
2024
2024
"th" ,
2025
2025
"%th" ,
2026
2026
"ty" ,
2027
- "%ty"
2027
+ "%ty" ,
2028
2028
]:
2029
2029
return np .float64 # Stata expects doubles for SIFs
2030
2030
else :
@@ -2231,7 +2231,7 @@ def __init__(
2231
2231
data_label : Optional [str ] = None ,
2232
2232
variable_labels : Optional [Dict [Label , str ]] = None ,
2233
2233
compression : Union [str , Mapping [str , str ], None ] = "infer" ,
2234
- storage_options : StorageOptions = None
2234
+ storage_options : StorageOptions = None ,
2235
2235
):
2236
2236
super ().__init__ ()
2237
2237
self ._convert_dates = {} if convert_dates is None else convert_dates
@@ -2563,7 +2563,7 @@ def write_file(self) -> None:
2563
2563
warnings .warn (
2564
2564
f"This save was not successful but { self ._fname } could not "
2565
2565
"be deleted. This file is not valid." ,
2566
- ResourceWarning
2566
+ ResourceWarning ,
2567
2567
)
2568
2568
raise exc
2569
2569
else :
@@ -2620,7 +2620,7 @@ def _write_value_labels(self) -> None:
2620
2620
def _write_header (
2621
2621
self ,
2622
2622
data_label : Optional [str ] = None ,
2623
- time_stamp : Optional [datetime .datetime ] = None
2623
+ time_stamp : Optional [datetime .datetime ] = None ,
2624
2624
) -> None :
2625
2625
byteorder = self ._byteorder
2626
2626
# ds_format - just use 114
@@ -2662,7 +2662,7 @@ def _write_header(
2662
2662
"Sep" ,
2663
2663
"Oct" ,
2664
2664
"Nov" ,
2665
- "Dec"
2665
+ "Dec" ,
2666
2666
]
2667
2667
month_lookup = {i + 1 : month for i , month in enumerate (months )}
2668
2668
ts = (
@@ -2863,7 +2863,7 @@ def __init__(
2863
2863
df : DataFrame ,
2864
2864
columns : Sequence [str ],
2865
2865
version : int = 117 ,
2866
- byteorder : Optional [str ] = None
2866
+ byteorder : Optional [str ] = None ,
2867
2867
):
2868
2868
if version not in (117 , 118 , 119 ):
2869
2869
raise ValueError ("Only dta versions 117, 118 and 119 supported" )
@@ -3114,7 +3114,7 @@ def __init__(
3114
3114
variable_labels : Optional [Dict [Label , str ]] = None ,
3115
3115
convert_strl : Optional [Sequence [Label ]] = None ,
3116
3116
compression : Union [str , Mapping [str , str ], None ] = "infer" ,
3117
- storage_options : StorageOptions = None
3117
+ storage_options : StorageOptions = None ,
3118
3118
):
3119
3119
# Copy to new list since convert_strl might be modified later
3120
3120
self ._convert_strl : List [Label ] = []
@@ -3131,7 +3131,7 @@ def __init__(
3131
3131
data_label = data_label ,
3132
3132
variable_labels = variable_labels ,
3133
3133
compression = compression ,
3134
- storage_options = storage_options
3134
+ storage_options = storage_options ,
3135
3135
)
3136
3136
self ._map : Dict [str , int ] = {}
3137
3137
self ._strl_blob = b""
@@ -3151,7 +3151,7 @@ def _update_map(self, tag: str) -> None:
3151
3151
def _write_header (
3152
3152
self ,
3153
3153
data_label : Optional [str ] = None ,
3154
- time_stamp : Optional [datetime .datetime ] = None
3154
+ time_stamp : Optional [datetime .datetime ] = None ,
3155
3155
) -> None :
3156
3156
"""Write the file header"""
3157
3157
byteorder = self ._byteorder
@@ -3193,7 +3193,7 @@ def _write_header(
3193
3193
"Sep" ,
3194
3194
"Oct" ,
3195
3195
"Nov" ,
3196
- "Dec"
3196
+ "Dec" ,
3197
3197
]
3198
3198
month_lookup = {i + 1 : month for i , month in enumerate (months )}
3199
3199
ts = (
@@ -3230,7 +3230,7 @@ def _write_map(self) -> None:
3230
3230
("strls" , 0 ),
3231
3231
("value_labels" , 0 ),
3232
3232
("stata_data_close" , 0 ),
3233
- ("end-of-file" , 0 )
3233
+ ("end-of-file" , 0 ),
3234
3234
)
3235
3235
)
3236
3236
# Move to start of map
@@ -3396,7 +3396,7 @@ def _set_formats_and_types(self, dtypes: Series) -> None:
3396
3396
dtype ,
3397
3397
self .data [col ],
3398
3398
dta_version = self ._dta_version ,
3399
- force_strl = force_strl
3399
+ force_strl = force_strl ,
3400
3400
)
3401
3401
self .fmtlist .append (fmt )
3402
3402
self .typlist .append (
@@ -3519,7 +3519,7 @@ def __init__(
3519
3519
convert_strl : Optional [Sequence [Label ]] = None ,
3520
3520
version : Optional [int ] = None ,
3521
3521
compression : Union [str , Mapping [str , str ], None ] = "infer" ,
3522
- storage_options : StorageOptions = None
3522
+ storage_options : StorageOptions = None ,
3523
3523
):
3524
3524
if version is None :
3525
3525
version = 118 if data .shape [1 ] <= 32767 else 119
@@ -3542,7 +3542,7 @@ def __init__(
3542
3542
variable_labels = variable_labels ,
3543
3543
convert_strl = convert_strl ,
3544
3544
compression = compression ,
3545
- storage_options = storage_options
3545
+ storage_options = storage_options ,
3546
3546
)
3547
3547
# Override version set in StataWriter117 init
3548
3548
self ._dta_version = version
0 commit comments