Skip to content

Commit 05f85a4

Browse files
authored
BUG: StataWriterUTF8 removing some valid characters in column names (#47276) (#47297)
1 parent 0f1bd81 commit 05f85a4

File tree

3 files changed

+13
-8
lines changed

3 files changed

+13
-8
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -847,6 +847,7 @@ I/O
847847
- Bug in :func:`read_sas` with certain types of compressed SAS7BDAT files (:issue:`35545`)
848848
- Bug in :func:`read_sas` returned ``None`` rather than an empty DataFrame for SAS7BDAT files with zero rows (:issue:`18198`)
849849
- Bug in :class:`StataWriter` where value labels were always written with default encoding (:issue:`46750`)
850+
- Bug in :class:`StataWriterUTF8` where some valid characters were removed from variable names (:issue:`47276`)
850851

851852
Period
852853
^^^^^^

pandas/io/stata.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -3646,12 +3646,16 @@ def _validate_variable_name(self, name: str) -> str:
36463646
# High code points appear to be acceptable
36473647
for c in name:
36483648
if (
3649-
ord(c) < 128
3650-
and (c < "A" or c > "Z")
3651-
and (c < "a" or c > "z")
3652-
and (c < "0" or c > "9")
3653-
and c != "_"
3654-
) or 128 <= ord(c) < 256:
3649+
(
3650+
ord(c) < 128
3651+
and (c < "A" or c > "Z")
3652+
and (c < "a" or c > "z")
3653+
and (c < "0" or c > "9")
3654+
and c != "_"
3655+
)
3656+
or 128 <= ord(c) < 192
3657+
or c in {"×", "÷"}
3658+
):
36553659
name = name.replace(c, "_")
36563660

36573661
return name

pandas/tests/io/test_stata.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1786,11 +1786,11 @@ def test_utf8_writer(self, version):
17861786
[2.0, 2, "ᴮ", ""],
17871787
[3.0, 3, "ᴰ", None],
17881788
],
1789-
columns=["a", "β", "ĉ", "strls"],
1789+
columns=["Å", "β", "ĉ", "strls"],
17901790
)
17911791
data["ᴐᴬᵀ"] = cat
17921792
variable_labels = {
1793-
"a": "apple",
1793+
"Å": "apple",
17941794
"β": "ᵈᵉᵊ",
17951795
"ĉ": "ᴎტჄႲႳႴႶႺ",
17961796
"strls": "Long Strings",

0 commit comments

Comments
 (0)