We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent e25df6b commit 000fcabCopy full SHA for 000fcab
pandas/tests/io/parser/test_encoding.py
@@ -275,17 +275,11 @@ def test_chunk_splits_multibyte_char(all_parsers):
275
@skip_pyarrow
276
def test_readcsv_memmap_utf8(all_parsers):
277
lines = []
278
- line_length = 128
279
- start_char = " "
280
- end_char = "\U00010080"
281
- # This for loop creates a list of 128-char strings
282
- # consisting of consecutive Unicode chars
283
- for lnum in range(ord(start_char), ord(end_char), line_length):
+ for lnum in range(0x20, 0x10080, 0x80):
284
line = "".join([chr(c) for c in range(lnum, lnum + 0x80)]) + "\n"
285
try:
286
line.encode("utf-8")
287
except UnicodeEncodeError:
288
- # Some 16-bit words are not valid Unicode chars and must be skipped
289
continue
290
lines.append(line)
291
parser = all_parsers
0 commit comments