Skip to content

Commit 6c31cab

Browse files
committed
ENH: memoize objects when reading from file to reduce memory footprint
1 parent d55856c commit 6c31cab

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

pandas/io/parsers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def __init__(self, f, delimiter=None, names=None, header=0,
219219
self.chunksize = chunksize
220220
self.passed_names = names is not None
221221
self.encoding = encoding
222-
222+
223223

224224
if com.is_integer(skiprows):
225225
skiprows = range(skiprows)
@@ -281,7 +281,7 @@ def _make_reader(self, f):
281281
dialect=dia)))
282282

283283
if self.encoding is not None:
284-
reader = com.UnicodeReader(f, dialect=dia,
284+
reader = com.UnicodeReader(f, dialect=dia,
285285
encoding=self.encoding)
286286
else:
287287
reader = csv.reader(f, dialect=dia)

pandas/src/inference.pyx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,7 @@ def sanitize_objects(ndarray[object] values, set na_values):
341341
Py_ssize_t i, n
342342
object val, onan
343343
Py_ssize_t na_count = 0
344+
dict memo = {}
344345

345346
n = len(values)
346347
onan = np.nan
@@ -350,6 +351,11 @@ def sanitize_objects(ndarray[object] values, set na_values):
350351
if val == '' or val in na_values:
351352
values[i] = onan
352353
na_count += 1
354+
elif val in memo:
355+
values[i] = memo[val]
356+
else:
357+
memo[val] = val
358+
353359
return na_count
354360

355361
def maybe_convert_bool(ndarray[object] arr):

0 commit comments

Comments
 (0)