File tree 3 files changed +13
-5
lines changed
3 files changed +13
-5
lines changed Original file line number Diff line number Diff line change 828
828
- Bug in :func: `read_parquet ` when ``engine="fastparquet" `` where the file was not closed on error (:issue: `46555 `)
829
829
- :meth: `to_html ` now excludes the ``border `` attribute from ``<table> `` elements when ``border `` keyword is set to ``False ``.
830
830
- Bug in :func: `read_sas ` returned ``None `` rather than an empty DataFrame for SAS7BDAT files with zero rows (:issue: `18198 `)
831
- -
831
+ - Bug in :class: ` StataWriter ` where value labels were always written with default encoding ( :issue: ` 46750 `)
832
832
833
833
Period
834
834
^^^^^^
Original file line number Diff line number Diff line change @@ -668,7 +668,9 @@ class StataValueLabel:
668
668
Encoding to use for value labels.
669
669
"""
670
670
671
- def __init__ (self , catarray : Series , encoding : str = "latin-1" ) -> None :
671
+ def __init__ (
672
+ self , catarray : Series , encoding : Literal ["latin-1" , "utf-8" ] = "latin-1"
673
+ ) -> None :
672
674
673
675
if encoding not in ("latin-1" , "utf-8" ):
674
676
raise ValueError ("Only latin-1 and utf-8 are supported." )
@@ -2250,7 +2252,7 @@ class StataWriter(StataParser):
2250
2252
"""
2251
2253
2252
2254
_max_string_length = 244
2253
- _encoding = "latin-1"
2255
+ _encoding : Literal [ "latin-1" , "utf-8" ] = "latin-1"
2254
2256
2255
2257
def __init__ (
2256
2258
self ,
@@ -2331,7 +2333,7 @@ def _prepare_non_cat_value_labels(
2331
2333
f"Can't create value labels for { labname } , value labels "
2332
2334
"can only be applied to numeric columns."
2333
2335
)
2334
- svl = StataNonCatValueLabel (colname , labels )
2336
+ svl = StataNonCatValueLabel (colname , labels , self . _encoding )
2335
2337
non_cat_value_labels .append (svl )
2336
2338
return non_cat_value_labels
2337
2339
@@ -3575,7 +3577,7 @@ class StataWriterUTF8(StataWriter117):
3575
3577
>>> writer.write_file()
3576
3578
"""
3577
3579
3578
- _encoding = "utf-8"
3580
+ _encoding : Literal [ "utf-8" ] = "utf-8"
3579
3581
3580
3582
def __init__ (
3581
3583
self ,
Original file line number Diff line number Diff line change @@ -1797,6 +1797,7 @@ def test_utf8_writer(self, version):
1797
1797
"ᴐᴬᵀ" : "" ,
1798
1798
}
1799
1799
data_label = "ᴅaᵀa-label"
1800
+ value_labels = {"β" : {1 : "label" , 2 : "æøå" , 3 : "ŋot valid latin-1" }}
1800
1801
data ["β" ] = data ["β" ].astype (np .int32 )
1801
1802
with tm .ensure_clean () as path :
1802
1803
writer = StataWriterUTF8 (
@@ -1807,11 +1808,16 @@ def test_utf8_writer(self, version):
1807
1808
variable_labels = variable_labels ,
1808
1809
write_index = False ,
1809
1810
version = version ,
1811
+ value_labels = value_labels ,
1810
1812
)
1811
1813
writer .write_file ()
1812
1814
reread_encoded = read_stata (path )
1813
1815
# Missing is intentionally converted to empty strl
1814
1816
data ["strls" ] = data ["strls" ].fillna ("" )
1817
+ # Variable with value labels is reread as categorical
1818
+ data ["β" ] = (
1819
+ data ["β" ].replace (value_labels ["β" ]).astype ("category" ).cat .as_ordered ()
1820
+ )
1815
1821
tm .assert_frame_equal (data , reread_encoded )
1816
1822
reader = StataReader (path )
1817
1823
assert reader .data_label == data_label
You can’t perform that action at this time.
0 commit comments