|
3 | 3 | from datetime import datetime
|
4 | 4 | import gzip
|
5 | 5 | import io
|
| 6 | +import itertools |
6 | 7 | import os
|
| 8 | +import string |
7 | 9 | import struct
|
8 | 10 | import tarfile
|
9 | 11 | import zipfile
|
@@ -1163,28 +1165,13 @@ def test_categorical_writing(self, version, temp_file):
|
1163 | 1165 |
|
1164 | 1166 | def test_categorical_warnings_and_errors(self, temp_file):
|
1165 | 1167 | # Warning for non-string labels
|
1166 |
| - # Error for labels too long |
1167 |
| - original = DataFrame.from_records( |
1168 |
| - [["a" * 10000], ["b" * 10000], ["c" * 10000], ["d" * 10000]], |
1169 |
| - columns=["Too_long"], |
1170 |
| - ) |
1171 |
| - |
1172 |
| - original = original.astype("category") |
1173 |
| - path = temp_file |
1174 |
| - msg = ( |
1175 |
| - "Stata value labels for a single variable must have " |
1176 |
| - r"a combined length less than 32,000 characters\." |
1177 |
| - ) |
1178 |
| - with pytest.raises(ValueError, match=msg): |
1179 |
| - original.to_stata(path) |
1180 |
| - |
1181 | 1168 | original = DataFrame.from_records(
|
1182 | 1169 | [["a"], ["b"], ["c"], ["d"], [1]], columns=["Too_long"]
|
1183 | 1170 | ).astype("category")
|
1184 | 1171 |
|
1185 | 1172 | msg = "data file created has not lost information due to duplicate labels"
|
1186 | 1173 | with tm.assert_produces_warning(ValueLabelTypeMismatch, match=msg):
|
1187 |
| - original.to_stata(path) |
| 1174 | + original.to_stata(temp_file) |
1188 | 1175 | # should get a warning for mixed content
|
1189 | 1176 |
|
1190 | 1177 | @pytest.mark.parametrize("version", [114, 117, 118, 119, None])
|
@@ -2592,3 +2579,12 @@ def test_empty_frame(temp_file):
|
2592 | 2579 | df3 = read_stata(path, columns=["a"])
|
2593 | 2580 | assert "b" not in df3
|
2594 | 2581 | tm.assert_series_equal(df3.dtypes, dtypes.loc[["a"]])
|
| 2582 | + |
| 2583 | + |
| 2584 | +@pytest.mark.parametrize("version", [114, 117, 118, 119, None]) |
| 2585 | +def test_many_strl(temp_file, version): |
| 2586 | + n = 65534 |
| 2587 | + df = DataFrame(np.arange(n), columns=["col"]) |
| 2588 | + lbls = ["".join(v) for v in itertools.product(*([string.ascii_letters] * 3))] |
| 2589 | + value_labels = {"col": {i: lbls[i] for i in range(n)}} |
| 2590 | + df.to_stata(temp_file, value_labels=value_labels, version=version) |
0 commit comments