Skip to content

Commit 7d04391

Browse files
mattipjreback
authored andcommitted
COMPAT: free parser memory at close() for non-refcnt gc
relying on __dealloc__ to clean up malloc() ed memory can lead to a perceived "leak" on PyPy since the garbage collector will not necessarily collect the object as soon as its refcnt reaches 0. Instead, pre-emptively release memory when close() is called The code still maintains backward compatibility for the case where close() is never called Author: mattip <[email protected]> Closes pandas-dev#15665 from mattip/pypy-compat and squashes the following commits: eaf50fe [mattip] COMPAT: free parser memory at close() for non-refcnt gc
1 parent 56b5a30 commit 7d04391

File tree

3 files changed

+22
-2
lines changed

3 files changed

+22
-2
lines changed

pandas/_libs/src/parser/tokenizer.c

+4
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ int parser_cleanup(parser_t *self) {
162162
if (self->cb_cleanup(self->source) < 0) {
163163
status = -1;
164164
}
165+
self->cb_cleanup = NULL;
165166
}
166167

167168
return status;
@@ -239,6 +240,9 @@ int parser_init(parser_t *self) {
239240
void parser_free(parser_t *self) {
240241
// opposite of parser_init
241242
parser_cleanup(self);
243+
}
244+
245+
void parser_del(parser_t *self) {
242246
free(self);
243247
}
244248

pandas/_libs/src/parser/tokenizer.h

+2
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,8 @@ int parser_set_skipfirstnrows(parser_t *self, int64_t nrows);
243243

244244
void parser_free(parser_t *self);
245245

246+
void parser_del(parser_t *self);
247+
246248
void parser_set_default_options(parser_t *self);
247249

248250
void debug_print_parser(parser_t *self);

pandas/io/parsers.pyx

+16-2
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ cdef extern from "parser/tokenizer.h":
214214

215215
int parser_init(parser_t *self) nogil
216216
void parser_free(parser_t *self) nogil
217+
void parser_del(parser_t *self) nogil
217218
int parser_add_skiprow(parser_t *self, int64_t row)
218219

219220
int parser_set_skipfirstnrows(parser_t *self, int64_t nrows)
@@ -573,8 +574,13 @@ cdef class TextReader:
573574

574575
def __dealloc__(self):
575576
parser_free(self.parser)
576-
kh_destroy_str(self.true_set)
577-
kh_destroy_str(self.false_set)
577+
if self.true_set:
578+
kh_destroy_str(self.true_set)
579+
self.true_set = NULL
580+
if self.false_set:
581+
kh_destroy_str(self.false_set)
582+
self.false_set = NULL
583+
parser_del(self.parser)
578584

579585
def close(self):
580586
# we need to properly close an open derived
@@ -584,6 +590,14 @@ cdef class TextReader:
584590
self.handle.close()
585591
except:
586592
pass
593+
# also preemptively free all allocated memory
594+
parser_free(self.parser)
595+
if self.true_set:
596+
kh_destroy_str(self.true_set)
597+
self.true_set = NULL
598+
if self.false_set:
599+
kh_destroy_str(self.false_set)
600+
self.false_set = NULL
587601

588602
def set_error_bad_lines(self, int status):
589603
self.parser.error_bad_lines = status

0 commit comments

Comments
 (0)