|
4 | 4 | import os
|
5 | 5 | import csv
|
6 | 6 | import codecs
|
| 7 | +import mmap |
7 | 8 | import zipfile
|
8 | 9 | from contextlib import contextmanager, closing
|
9 | 10 |
|
@@ -276,7 +277,7 @@ def ZipFile(*args, **kwargs):
|
276 | 277 | ZipFile = zipfile.ZipFile
|
277 | 278 |
|
278 | 279 |
|
279 |
| -def _get_handle(path, mode, encoding=None, compression=None): |
| 280 | +def _get_handle(path, mode, encoding=None, compression=None, memory_map=False): |
280 | 281 | """Gets file handle for given path and mode.
|
281 | 282 | """
|
282 | 283 | if compression is not None:
|
@@ -324,9 +325,55 @@ def _get_handle(path, mode, encoding=None, compression=None):
|
324 | 325 | else:
|
325 | 326 | f = open(path, mode)
|
326 | 327 |
|
| 328 | + if memory_map and hasattr(f, 'fileno'): |
| 329 | + try: |
| 330 | + f = MMapWrapper(f) |
| 331 | + except Exception: |
| 332 | + # we catch any errors that may have occurred |
| 333 | + # because that is consistent with the lower-level |
| 334 | + # functionality of the C engine (pd.read_csv), so |
| 335 | + # leave the file handler as is then |
| 336 | + pass |
| 337 | + |
327 | 338 | return f
|
328 | 339 |
|
329 | 340 |
|
| 341 | +class MMapWrapper(BaseIterator): |
| 342 | + """ |
| 343 | + Wrapper for the Python's mmap class so that it can be properly read in |
| 344 | + by Python's csv.reader class. |
| 345 | +
|
| 346 | + Parameters |
| 347 | + ---------- |
| 348 | + f : file object |
| 349 | + File object to be mapped onto memory. Must support the 'fileno' |
| 350 | + method or have an equivalent attribute |
| 351 | +
|
| 352 | + """ |
| 353 | + |
| 354 | + def __init__(self, f): |
| 355 | + self.mmap = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) |
| 356 | + |
| 357 | + def __getattr__(self, name): |
| 358 | + return getattr(self.mmap, name) |
| 359 | + |
| 360 | + def __next__(self): |
| 361 | + newline = self.mmap.readline() |
| 362 | + |
| 363 | + # readline returns bytes, not str, in Python 3, |
| 364 | + # but Python's CSV reader expects str, so convert |
| 365 | + # the output to str before continuing |
| 366 | + if compat.PY3: |
| 367 | + newline = compat.bytes_to_str(newline) |
| 368 | + |
| 369 | + # mmap doesn't raise if reading past the allocated |
| 370 | + # data but instead returns an empty string, so raise |
| 371 | + # if that is returned |
| 372 | + if newline == '': |
| 373 | + raise StopIteration |
| 374 | + return newline |
| 375 | + |
| 376 | + |
330 | 377 | class UTF8Recoder(BaseIterator):
|
331 | 378 |
|
332 | 379 | """
|
|
0 commit comments