Skip to content

Commit 000fcab

Browse files
committed
PERF: read_csv with memory_map=True when file encoding is UTF-8 (#43787)
1 parent e25df6b commit 000fcab

File tree

1 file changed

+1
-7
lines changed

1 file changed

+1
-7
lines changed

pandas/tests/io/parser/test_encoding.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -275,17 +275,11 @@ def test_chunk_splits_multibyte_char(all_parsers):
275275
@skip_pyarrow
276276
def test_readcsv_memmap_utf8(all_parsers):
277277
lines = []
278-
line_length = 128
279-
start_char = " "
280-
end_char = "\U00010080"
281-
# This for loop creates a list of 128-char strings
282-
# consisting of consecutive Unicode chars
283-
for lnum in range(ord(start_char), ord(end_char), line_length):
278+
for lnum in range(0x20, 0x10080, 0x80):
284279
line = "".join([chr(c) for c in range(lnum, lnum + 0x80)]) + "\n"
285280
try:
286281
line.encode("utf-8")
287282
except UnicodeEncodeError:
288-
# Some 16-bit words are not valid Unicode chars and must be skipped
289283
continue
290284
lines.append(line)
291285
parser = all_parsers

0 commit comments

Comments
 (0)