Skip to content

Commit fdc43d8

Browse files
committed
suggested substitutions
1 parent 78e16a9 commit fdc43d8

File tree

5 files changed

+30
-80
lines changed

5 files changed

+30
-80
lines changed

pandas/_libs/lib.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
3333
PyDateTime_IMPORT)
3434
PyDateTime_IMPORT
3535

36-
from tslib import NaT, Timestamp, Timedelta, array_to_datetime
36+
from tslib import NaT, array_to_datetime
3737
from missing cimport checknull
3838

3939

pandas/_libs/parsers.pyx

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2225,3 +2225,27 @@ def _maybe_encode(values):
22252225
if values is None:
22262226
return []
22272227
return [x.encode('utf-8') if isinstance(x, unicode) else x for x in values]
2228+
2229+
2230+
def sanitize_objects(ndarray[object] values, set na_values,
2231+
convert_empty=True):
2232+
cdef:
2233+
Py_ssize_t i, n
2234+
object val, onan
2235+
Py_ssize_t na_count = 0
2236+
dict memo = {}
2237+
2238+
n = len(values)
2239+
onan = np.nan
2240+
2241+
for i from 0 <= i < n:
2242+
val = values[i]
2243+
if (convert_empty and val == '') or (val in na_values):
2244+
values[i] = onan
2245+
na_count += 1
2246+
elif val in memo:
2247+
values[i] = memo[val]
2248+
else:
2249+
memo[val] = val
2250+
2251+
return na_count

pandas/_libs/writers.pyx

Lines changed: 0 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,11 @@ try:
1111
except ImportError:
1212
from cpython cimport PyUnicode_GET_SIZE as PyString_GET_SIZE
1313

14-
1514
import numpy as np
1615
cimport numpy as cnp
1716
from numpy cimport ndarray, uint8_t
1817
cnp.import_array()
1918

20-
2119
cimport util
2220

2321

@@ -27,30 +25,6 @@ ctypedef fused pandas_string:
2725
bytes
2826

2927

30-
def sanitize_objects(ndarray[object] values, set na_values,
31-
convert_empty=True):
32-
cdef:
33-
Py_ssize_t i, n
34-
object val, onan
35-
Py_ssize_t na_count = 0
36-
dict memo = {}
37-
38-
n = len(values)
39-
onan = np.nan
40-
41-
for i from 0 <= i < n:
42-
val = values[i]
43-
if (convert_empty and val == '') or (val in na_values):
44-
values[i] = onan
45-
na_count += 1
46-
elif val in memo:
47-
values[i] = memo[val]
48-
else:
49-
memo[val] = val
50-
51-
return na_count
52-
53-
5428
@cython.boundscheck(False)
5529
@cython.wraparound(False)
5630
def write_csv_rows(list data, ndarray data_index,
@@ -187,49 +161,3 @@ def string_array_replace_from_nan_rep(
187161
arr[i] = replace
188162

189163
return arr
190-
191-
192-
def convert_timestamps(ndarray values):
193-
cdef:
194-
object val, f, result
195-
dict cache = {}
196-
Py_ssize_t i, n = len(values)
197-
ndarray[object] out
198-
199-
# for HDFStore, a bit temporary but...
200-
201-
from datetime import datetime
202-
f = datetime.fromtimestamp
203-
204-
out = np.empty(n, dtype='O')
205-
206-
for i in range(n):
207-
val = util.get_value_1d(values, i)
208-
if val in cache:
209-
out[i] = cache[val]
210-
else:
211-
cache[val] = out[i] = f(val)
212-
213-
return out
214-
215-
216-
@cython.wraparound(False)
217-
@cython.boundscheck(False)
218-
def fast_unique(ndarray[object] values):
219-
cdef:
220-
Py_ssize_t i, n = len(values)
221-
list uniques = []
222-
dict table = {}
223-
object val, stub = 0
224-
225-
for i from 0 <= i < n:
226-
val = values[i]
227-
if val not in table:
228-
table[val] = stub
229-
uniques.append(val)
230-
try:
231-
uniques.sort()
232-
except Exception:
233-
pass
234-
235-
return uniques

pandas/io/parsers.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141

4242
from pandas.util._decorators import Appender
4343

44-
from pandas._libs import writers as libwriters
4544
import pandas._libs.lib as lib
4645
import pandas._libs.parsers as parsers
4746
from pandas._libs.tslibs import parsing
@@ -1597,13 +1596,12 @@ def _infer_types(self, values, na_values, try_num_bool=True):
15971596
except Exception:
15981597
result = values
15991598
if values.dtype == np.object_:
1600-
na_count = libwriters.sanitize_objects(result, na_values,
1601-
False)
1599+
na_count = parsers.sanitize_objects(result, na_values,
1600+
False)
16021601
else:
16031602
result = values
16041603
if values.dtype == np.object_:
1605-
na_count = libwriters.sanitize_objects(values, na_values,
1606-
False)
1604+
na_count = parsers.sanitize_objects(values, na_values, False)
16071605

16081606
if result.dtype == np.object_ and try_num_bool:
16091607
result = lib.maybe_convert_bool(values,

pandas/io/pytables.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3843,7 +3843,7 @@ def read(self, where=None, columns=None, **kwargs):
38433843
# need a better algorithm
38443844
tuple_index = long_index.values
38453845

3846-
unique_tuples = libwriters.fast_unique(tuple_index)
3846+
unique_tuples = unique(tuple_index)
38473847
unique_tuples = com._asarray_tuplesafe(unique_tuples)
38483848

38493849
indexer = match(unique_tuples, tuple_index)
@@ -4622,7 +4622,7 @@ def _get_converter(kind, encoding):
46224622
if kind == 'datetime64':
46234623
return lambda x: np.asarray(x, dtype='M8[ns]')
46244624
elif kind == 'datetime':
4625-
return libwriters.convert_timestamps
4625+
return lambda x: to_datetime(x, cache=True).to_pydatetime()
46264626
elif kind == 'string':
46274627
return lambda x: _unconvert_string_array(x, encoding=encoding)
46284628
else: # pragma: no cover

0 commit comments

Comments
 (0)