suggested substitutions

jbrockmendel · jbrockmendel · commit fdc43d8420d2 · 2018-01-27T16:36:15.000-08:00
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -33,7 +33,7 @@ from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
                                PyDateTime_IMPORT)
 PyDateTime_IMPORT
 
-from tslib import NaT, Timestamp, Timedelta, array_to_datetime
+from tslib import NaT, array_to_datetime
 from missing cimport checknull
 
 
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -2225,3 +2225,27 @@ def _maybe_encode(values):
     if values is None:
         return []
     return [x.encode('utf-8') if isinstance(x, unicode) else x for x in values]
+
+
+def sanitize_objects(ndarray[object] values, set na_values,
+                     convert_empty=True):
+    cdef:
+        Py_ssize_t i, n
+        object val, onan
+        Py_ssize_t na_count = 0
+        dict memo = {}
+
+    n = len(values)
+    onan = np.nan
+
+    for i from 0 <= i < n:
+        val = values[i]
+        if (convert_empty and val == '') or (val in na_values):
+            values[i] = onan
+            na_count += 1
+        elif val in memo:
+            values[i] = memo[val]
+        else:
+            memo[val] = val
+
+    return na_count
diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx
@@ -11,13 +11,11 @@ try:
 except ImportError:
     from cpython cimport PyUnicode_GET_SIZE as PyString_GET_SIZE
 
-
 import numpy as np
 cimport numpy as cnp
 from numpy cimport ndarray, uint8_t
 cnp.import_array()
 
-
 cimport util
 
 
@@ -27,30 +25,6 @@ ctypedef fused pandas_string:
     bytes
 
 
-def sanitize_objects(ndarray[object] values, set na_values,
-                     convert_empty=True):
-    cdef:
-        Py_ssize_t i, n
-        object val, onan
-        Py_ssize_t na_count = 0
-        dict memo = {}
-
-    n = len(values)
-    onan = np.nan
-
-    for i from 0 <= i < n:
-        val = values[i]
-        if (convert_empty and val == '') or (val in na_values):
-            values[i] = onan
-            na_count += 1
-        elif val in memo:
-            values[i] = memo[val]
-        else:
-            memo[val] = val
-
-    return na_count
-
-
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def write_csv_rows(list data, ndarray data_index,
@@ -187,49 +161,3 @@ def string_array_replace_from_nan_rep(
             arr[i] = replace
 
     return arr
-
-
-def convert_timestamps(ndarray values):
-    cdef:
-        object val, f, result
-        dict cache = {}
-        Py_ssize_t i, n = len(values)
-        ndarray[object] out
-
-    # for HDFStore, a bit temporary but...
-
-    from datetime import datetime
-    f = datetime.fromtimestamp
-
-    out = np.empty(n, dtype='O')
-
-    for i in range(n):
-        val = util.get_value_1d(values, i)
-        if val in cache:
-            out[i] = cache[val]
-        else:
-            cache[val] = out[i] = f(val)
-
-    return out
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def fast_unique(ndarray[object] values):
-    cdef:
-        Py_ssize_t i, n = len(values)
-        list uniques = []
-        dict table = {}
-        object val, stub = 0
-
-    for i from 0 <= i < n:
-        val = values[i]
-        if val not in table:
-            table[val] = stub
-            uniques.append(val)
-    try:
-        uniques.sort()
-    except Exception:
-        pass
-
-    return uniques
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -41,7 +41,6 @@
 
 from pandas.util._decorators import Appender
 
-from pandas._libs import writers as libwriters
 import pandas._libs.lib as lib
 import pandas._libs.parsers as parsers
 from pandas._libs.tslibs import parsing
@@ -1597,13 +1596,12 @@ def _infer_types(self, values, na_values, try_num_bool=True):
             except Exception:
                 result = values
                 if values.dtype == np.object_:
-                    na_count = libwriters.sanitize_objects(result, na_values,
-                                                           False)
+                    na_count = parsers.sanitize_objects(result, na_values,
+                                                        False)
         else:
             result = values
             if values.dtype == np.object_:
-                na_count = libwriters.sanitize_objects(values, na_values,
-                                                       False)
+                na_count = parsers.sanitize_objects(values, na_values, False)
 
         if result.dtype == np.object_ and try_num_bool:
             result = lib.maybe_convert_bool(values,
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -3843,7 +3843,7 @@ def read(self, where=None, columns=None, **kwargs):
                 # need a better algorithm
                 tuple_index = long_index.values
 
-                unique_tuples = libwriters.fast_unique(tuple_index)
+                unique_tuples = unique(tuple_index)
                 unique_tuples = com._asarray_tuplesafe(unique_tuples)
 
                 indexer = match(unique_tuples, tuple_index)
@@ -4622,7 +4622,7 @@ def _get_converter(kind, encoding):
     if kind == 'datetime64':
         return lambda x: np.asarray(x, dtype='M8[ns]')
     elif kind == 'datetime':
-        return libwriters.convert_timestamps
+        return lambda x: to_datetime(x, cache=True).to_pydatetime()
     elif kind == 'string':
         return lambda x: _unconvert_string_array(x, encoding=encoding)
     else:  # pragma: no cover