Skip to content

Commit e26e3ee

Browse files
authored
BUG: Remove incorrect check on value label length (pandas-dev#60156)
* BUG: Remove incorrect check on value label length Remove 32,000 limit on value limit check since this applies to the number of variable, not the length of the value labels closes pandas-dev#60107 * TST: Remove incorrect test Remove test of the error that was being incorrectly raised
1 parent e7bb845 commit e26e3ee

File tree

3 files changed

+13
-22
lines changed

3 files changed

+13
-22
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,7 @@ I/O
687687
- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
688688
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
689689
- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
690+
- Bug in :meth:`DataFrame.to_stata` when writing more than 32,000 value labels. (:issue:`60107`)
690691
- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
691692
- Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
692693
- Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)

pandas/io/stata.py

-6
Original file line numberDiff line numberDiff line change
@@ -691,12 +691,6 @@ def _prepare_value_labels(self) -> None:
691691
self.txt.append(category)
692692
self.n += 1
693693

694-
if self.text_len > 32000:
695-
raise ValueError(
696-
"Stata value labels for a single variable must "
697-
"have a combined length less than 32,000 characters."
698-
)
699-
700694
# Ensure int32
701695
self.off = np.array(offsets, dtype=np.int32)
702696
self.val = np.array(values, dtype=np.int32)

pandas/tests/io/test_stata.py

+12-16
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
from datetime import datetime
44
import gzip
55
import io
6+
import itertools
67
import os
8+
import string
79
import struct
810
import tarfile
911
import zipfile
@@ -1163,28 +1165,13 @@ def test_categorical_writing(self, version, temp_file):
11631165

11641166
def test_categorical_warnings_and_errors(self, temp_file):
11651167
# Warning for non-string labels
1166-
# Error for labels too long
1167-
original = DataFrame.from_records(
1168-
[["a" * 10000], ["b" * 10000], ["c" * 10000], ["d" * 10000]],
1169-
columns=["Too_long"],
1170-
)
1171-
1172-
original = original.astype("category")
1173-
path = temp_file
1174-
msg = (
1175-
"Stata value labels for a single variable must have "
1176-
r"a combined length less than 32,000 characters\."
1177-
)
1178-
with pytest.raises(ValueError, match=msg):
1179-
original.to_stata(path)
1180-
11811168
original = DataFrame.from_records(
11821169
[["a"], ["b"], ["c"], ["d"], [1]], columns=["Too_long"]
11831170
).astype("category")
11841171

11851172
msg = "data file created has not lost information due to duplicate labels"
11861173
with tm.assert_produces_warning(ValueLabelTypeMismatch, match=msg):
1187-
original.to_stata(path)
1174+
original.to_stata(temp_file)
11881175
# should get a warning for mixed content
11891176

11901177
@pytest.mark.parametrize("version", [114, 117, 118, 119, None])
@@ -2592,3 +2579,12 @@ def test_empty_frame(temp_file):
25922579
df3 = read_stata(path, columns=["a"])
25932580
assert "b" not in df3
25942581
tm.assert_series_equal(df3.dtypes, dtypes.loc[["a"]])
2582+
2583+
2584+
@pytest.mark.parametrize("version", [114, 117, 118, 119, None])
2585+
def test_many_strl(temp_file, version):
2586+
n = 65534
2587+
df = DataFrame(np.arange(n), columns=["col"])
2588+
lbls = ["".join(v) for v in itertools.product(*([string.ascii_letters] * 3))]
2589+
value_labels = {"col": {i: lbls[i] for i in range(n)}}
2590+
df.to_stata(temp_file, value_labels=value_labels, version=version)

0 commit comments

Comments
 (0)