From 45dfa46410ab1c5198637de91d4d5e9f46866e5c Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sun, 17 Mar 2019 19:48:53 +0300
Subject: [PATCH 01/42] PERF: rewrited _concat_date_cols function on C with
 removing extra conversation for integer/float zero and float NaN; rewrited
 _does_string_look_like_datetime on C

---
 pandas/_libs/tslibs/parsing.pyx               |  18 -
 .../_libs/tslibs/src/datetime/datehelpers.c   | 428 ++++++++++++++++++
 pandas/io/parsers.py                          |  18 +-
 setup.py                                      |  18 +
 4 files changed, 453 insertions(+), 29 deletions(-)
 create mode 100644 pandas/_libs/tslibs/src/datetime/datehelpers.c

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index 1c8bfe4b4bc20..72de28c5ac54b 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -302,24 +302,6 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False,
     return parsed, parsed, reso
 
 
-cpdef bint _does_string_look_like_datetime(object date_string):
-    if date_string.startswith('0'):
-        # Strings starting with 0 are more consistent with a
-        # date-like string than a number
-        return True
-
-    try:
-        if float(date_string) < 1000:
-            return False
-    except ValueError:
-        pass
-
-    if date_string in _not_datelike_strings:
-        return False
-
-    return True
-
-
 cdef inline object _parse_dateabbr_string(object date_string, object default,
                                           object freq):
     cdef:
diff --git a/pandas/_libs/tslibs/src/datetime/datehelpers.c b/pandas/_libs/tslibs/src/datetime/datehelpers.c
new file mode 100644
index 0000000000000..2a4ced54d753f
--- /dev/null
+++ b/pandas/_libs/tslibs/src/datetime/datehelpers.c
@@ -0,0 +1,428 @@
+#include <Python.h>
+
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#include <numpy/arrayobject.h>
+
+#include <string.h>
+
+#include "../../../src/inline_helper.h"
+#include "../../../src/parser/tokenizer.h"
+
+#if PY_MAJOR_VERSION >= 3
+    #define PY_STRING_CHECK(string) (PyUnicode_Check(string))
+#else
+    #define PY_STRING_CHECK(string) \
+        (PyString_Check(string) || PyUnicode_Check(string))
+#endif
+
+int PANDAS_INLINE convert_and_set_item(PyObject *item, Py_ssize_t index,
+                                       PyArrayObject *result,
+                                       int keep_trivial_numbers) {
+    int needs_decref = 0, do_convert = 1;
+    if (item == NULL) {
+        return 0;
+    }
+    if (keep_trivial_numbers) {
+        // don't convert an integer if it's zero,
+        // don't convert a float if it's zero or NaN
+#if PY_MAJOR_VERSION >= 3
+        if (PyLong_Check(item)) {
+            PyLongObject* v = (PyLongObject*)item;
+            switch (Py_SIZE(v)) {
+            case 0:
+                do_convert = 0;
+                break;
+            case 1:  // fallthrough
+            case -1:
+                if (v->ob_digit[0] == 0) {
+                    do_convert = 0;
+                }
+            }
+#else
+        if (PyInt_CheckExact(item)) {
+            if (((PyIntObject*)item)->ob_ival == 0) do_convert = 0;
+#endif
+        } else if (PyFloat_Check(item)) {
+            double v = PyFloat_AS_DOUBLE(item);
+            if (v == 0.0 || v != v) {
+                do_convert = 0;
+            }
+        }
+    }
+
+    if (do_convert) {
+        if (!PY_STRING_CHECK(item)) {
+            PyObject *str_item = PyObject_Str(item);
+            if (str_item == NULL) {
+                return 0;
+            }
+            item = str_item;
+            needs_decref = 1;
+        }
+    }
+    if (PyArray_SETITEM(result, PyArray_GETPTR1(result, index), item) != 0) {
+        PyErr_SetString(PyExc_RuntimeError, "Cannot set resulting item");
+        if (needs_decref) Py_DECREF(item);
+        return 0;
+    }
+    if (needs_decref) Py_DECREF(item);
+    return 1;
+}
+
+static int put_object_as_string(PyObject* list, Py_ssize_t idx,
+                                PyObject* item) {
+    if (!PY_STRING_CHECK(item)) {
+        PyObject* str_item = PyObject_Str(item);
+        if (str_item == NULL) {
+            return 0;
+        }
+        Py_DECREF(item);
+        item = str_item;
+    }
+    return (PyList_SetItem(list, idx, item) == 0) ? 1 : 0;
+}
+
+static PyObject* free_arrays(PyObject** arrays, Py_ssize_t size) {
+    PyObject** item = arrays;
+    Py_ssize_t i;
+    for (i = 0; i < size; ++i, ++item) Py_DECREF(*item);
+    free(arrays);
+    return NULL;
+}
+
+static PyObject* concat_date_cols(PyObject *self, PyObject *args,
+                                  PyObject *kwds) {
+    PyObject *sequence = NULL;
+    PyObject *py_keep_trivial_numbers = NULL;
+    PyArrayObject *result = NULL;
+    Py_ssize_t sequence_size = 0;
+    int keep_trivial_numbers;
+    char* kwlist[] = {"", "keep_trivial_numbers", NULL};
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O", kwlist,
+                                     &sequence, &py_keep_trivial_numbers)) {
+        return NULL;
+    }
+    if (!PySequence_Check(sequence)) {
+        PyErr_SetString(PyExc_TypeError, "argument must be sequence");
+        return NULL;
+    }
+    keep_trivial_numbers = (py_keep_trivial_numbers != NULL) ? \
+            PyObject_IsTrue(py_keep_trivial_numbers) : 0;
+
+    sequence_size = PySequence_Size(sequence);
+    if (sequence_size == -1) {
+        return NULL;
+    } else if (sequence_size == 0) {
+        npy_intp dims[1];
+        dims[0] = 0;
+        result = (PyArrayObject*)PyArray_ZEROS(1, dims, NPY_OBJECT, 0);
+        return (PyObject*)result;
+    } else if (sequence_size == 1) {
+        PyObject* array = PySequence_GetItem(sequence, 0);
+        Py_ssize_t array_size;
+        if (array == NULL) {
+            return NULL;
+        }
+
+        array_size = PySequence_Size(array);
+        if (array_size == -1) {
+            Py_DECREF(array);
+            return NULL;
+        }
+
+        {
+            npy_intp dims[1];
+            dims[0] = array_size;
+            result = (PyArrayObject*)PyArray_ZEROS(1, dims, NPY_OBJECT, 0);
+            if (result == NULL) {
+                Py_DECREF(array);
+                return NULL;
+            }
+        }
+
+        if (PyArray_CheckExact(array)) {
+            PyArrayObject *ndarray = (PyArrayObject*)array;
+            Py_ssize_t i;
+            for (i = 0; i < array_size; ++i) {
+                PyObject *item = PyArray_GETITEM(ndarray,
+                                                 PyArray_GETPTR1(ndarray, i));
+                if (!convert_and_set_item(item, i, result,
+                                          keep_trivial_numbers)) {
+                    Py_DECREF(result);
+                    Py_DECREF(array);
+                    Py_DECREF(item);
+                    return NULL;
+                }
+                Py_DECREF(item);
+            }
+        } else {
+            PyObject* fast_array = PySequence_Fast(array,
+                    "elements of input sequence must be sequence");
+            Py_ssize_t i;
+            if (fast_array == NULL) {
+                Py_DECREF(result);
+                Py_DECREF(array);
+                // PySequence_Fast set message, which in second argument
+                return NULL;
+            }
+
+            for (i = 0; i < array_size; ++i) {
+                PyObject* item = PySequence_Fast_GET_ITEM(fast_array, i);
+                if (!convert_and_set_item(item, i, result,
+                                          keep_trivial_numbers)) {
+                    Py_DECREF(result);
+                    Py_DECREF(array);
+                    Py_DECREF(fast_array);
+                    return NULL;
+                }
+            }
+            Py_DECREF(fast_array);
+        }
+        Py_DECREF(array);
+        return (PyObject*)result;
+    } else {
+        size_t mem_size = sizeof(PyObject*) * sequence_size;
+        PyObject **arrays = (PyObject**) malloc(mem_size);
+        PyObject *array = NULL;
+        PyObject **parray = NULL;
+        PyObject *fast_array = NULL;
+        PyObject *separator = NULL;
+        PyObject *item = NULL;
+        PyObject *list_to_join = NULL;
+        Py_ssize_t min_array_size = 0;
+        int all_numpy = 1;
+        Py_ssize_t i;
+        for (i = 0; i < sequence_size; ++i) {
+            array = PySequence_GetItem(sequence, i);
+            if (array == NULL) {
+                return free_arrays(arrays, i);
+            }
+            if (PyArray_CheckExact(array)) {
+                if (PyArray_NDIM((PyArrayObject*)array) != 1) {
+                    PyErr_SetString(PyExc_ValueError,
+                                    "ndarrays must be 1-dimentional");
+                    return free_arrays(arrays, i);
+                }
+            } else {
+                all_numpy = 0;
+            }
+            arrays[i] = array;
+        }
+
+        parray = arrays;
+        if (all_numpy) {
+            Py_ssize_t i;
+            for (i = 0; i < sequence_size; ++i, ++parray) {
+                Py_ssize_t array_size = PyArray_SIZE((PyArrayObject*)(*parray));
+
+                if (array_size < 0) {
+                    return free_arrays(arrays, sequence_size);
+                }
+
+                if (array_size < min_array_size || min_array_size == 0) {
+                    min_array_size = array_size;
+                }
+            }
+        } else {
+            Py_ssize_t i;
+            for (i = 0; i < sequence_size; ++i, ++parray) {
+                Py_ssize_t array_size;
+                fast_array = PySequence_Fast(*parray,
+                        "elements of input sequence must be sequence");
+                array_size = (fast_array != NULL) ? \
+                        PySequence_Fast_GET_SIZE(fast_array) : -1;
+
+                if (array_size < 0) {
+                    Py_XDECREF(fast_array);
+                    return free_arrays(arrays, sequence_size);
+                }
+                Py_DECREF(array);
+                arrays[i] = fast_array;
+
+                if (array_size < min_array_size || min_array_size == 0) {
+                    min_array_size = array_size;
+                }
+            }
+        }
+
+        {
+            npy_intp dims[1];
+            dims[0] = min_array_size;
+            result = (PyArrayObject*)PyArray_ZEROS(1, dims, NPY_OBJECT, 0);
+            if (result == NULL) {
+                return free_arrays(arrays, sequence_size);
+            }
+        }
+
+        separator = PyUnicode_FromFormat(" ");
+        if (separator == NULL) {
+            Py_DECREF(result);
+            return free_arrays(arrays, sequence_size);
+        }
+        list_to_join = PyList_New(sequence_size);
+        for (i = 0; i < min_array_size; ++i) {
+            PyObject *result_string = NULL;
+            parray = arrays;
+            if (all_numpy) {
+                Py_ssize_t j;
+                for (j = 0; j < sequence_size; ++j, ++parray) {
+                    PyArrayObject* arr = (PyArrayObject*)(*parray);
+                    item = PyArray_GETITEM(arr, PyArray_GETPTR1(arr, i));
+                    if (item == NULL) {
+                        Py_DECREF(list_to_join);
+                        Py_DECREF(result);
+                        return free_arrays(arrays, sequence_size);
+                    }
+                    if (!put_object_as_string(list_to_join, j, item)) {
+                        Py_DECREF(item);
+                        Py_DECREF(list_to_join);
+                        Py_DECREF(result);
+                        return free_arrays(arrays, sequence_size);
+                    }
+                }
+            } else {
+                Py_ssize_t j;
+                for (j = 0; j < sequence_size; ++j, ++parray) {
+                    item = PySequence_Fast_GET_ITEM(*parray, i);
+                    if (item == NULL) {
+                        Py_DECREF(list_to_join);
+                        Py_DECREF(result);
+                        return free_arrays(arrays, sequence_size);
+                    }
+                    Py_INCREF(item);
+                    if (!put_object_as_string(list_to_join, j, item)) {
+                        Py_DECREF(item);
+                        Py_DECREF(list_to_join);
+                        Py_DECREF(result);
+                        return free_arrays(arrays, sequence_size);
+                    }
+                }
+            }
+            result_string = PyUnicode_Join(separator, list_to_join);
+            if (result_string == NULL) {
+                Py_DECREF(list_to_join);
+                Py_DECREF(result);
+                return free_arrays(arrays, sequence_size);
+            }
+            if (PyArray_SETITEM(result, PyArray_GETPTR1(result, i),
+                                result_string) != 0) {
+                PyErr_SetString(PyExc_RuntimeError,
+                                "Cannot set resulting item");
+                Py_DECREF(list_to_join);
+                Py_DECREF(result);
+                Py_DECREF(result_string);
+                return free_arrays(arrays, sequence_size);
+            }
+            Py_DECREF(result_string);
+        }
+        Py_DECREF(list_to_join);
+        (void)free_arrays(arrays, sequence_size);
+        return (PyObject*)result;
+    }
+}
+
+static char not_datelike[sizeof(char) * 256];
+
+static PyObject* _does_string_look_like_datetime(PyObject* unused,
+                                                 PyObject* arg) {
+    char *buf = NULL, *endptr = NULL;
+    Py_ssize_t length = -1;
+    double converted_date;
+    int error = 0;
+    int result = 1;
+
+#if PY_MAJOR_VERSION == 2
+    if (!PyString_CheckExact(arg)) {
+        if (!PyUnicode_CheckExact(arg)) {
+            // arg is not a string, so it's certainly
+            // not a datetime-looking string
+            PyErr_SetString(PyExc_ValueError,
+                            "_does_string_look_like_datetime expects a string");
+            return NULL;
+        }
+        buf = PyUnicode_AS_DATA(arg);
+        length = (int)PyUnicode_GET_SIZE(arg);
+    } else {
+        if (PyString_AsStringAndSize(arg, &buf, &length) == -1) {
+            return NULL;
+        }
+    }
+#else
+    if (!PyUnicode_CheckExact(arg) || !PyUnicode_IS_READY(arg)) {
+        PyErr_SetString(PyExc_ValueError,
+                        "_does_string_look_like_datetime expects a string");
+        return NULL;
+    }
+    buf = PyUnicode_DATA(arg);
+    length = PyUnicode_GET_LENGTH(arg);
+#endif
+
+    if (length >= 1) {
+        char first = *buf;
+        if (first == '0') {
+            result = 1;
+        } else if (length == 1 && not_datelike[Py_CHARMASK(first)]) {
+            result = 0;
+        } else {
+            converted_date = xstrtod(buf, &endptr, '.', 'e', '\0', 1);
+            if ((errno == 0) && (endptr == buf + length)) {
+                result = (converted_date >= 1000) ? 1 : 0;
+            }
+        }
+    }
+
+    if (result) {
+        Py_RETURN_TRUE;
+    } else {
+        Py_RETURN_FALSE;
+    }
+}
+
+static PyMethodDef module_methods[] = {
+    /* name from python, name in C-file, ..., __doc__ string of method */
+    {
+        "concat_date_cols", (PyCFunction)concat_date_cols,
+        METH_VARARGS | METH_KEYWORDS,
+        "concatenates date cols and returns numpy array"
+    },
+    {
+        "_does_string_look_like_datetime", _does_string_look_like_datetime,
+        METH_O,
+        "checks if string looks like a datetime"
+    },
+    {NULL, NULL, 0, NULL}
+};
+
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef moduledef = {
+    PyModuleDef_HEAD_INIT,
+    "datehelpers",                                   // name of module
+    "helpers for datetime structures manipulation",  // module documentation
+    -1,             // size of per-interpreter state of the module,
+                    // or -1 if the module keeps state in global variables.
+    module_methods
+};
+#define PY_DATEHELPERS_MODULE_INIT PyMODINIT_FUNC PyInit_datehelpers(void)
+#define PY_MODULE_CREATE PyModule_Create(&moduledef)
+#define PY_RETURN_MODULE return module
+#else
+#define PY_DATEHELPERS_MODULE_INIT void initdatehelpers(void)
+#define PY_MODULE_CREATE Py_InitModule("datehelpers", module_methods)
+#define PY_RETURN_MODULE
+#endif
+
+PY_DATEHELPERS_MODULE_INIT {
+    PyObject *module = NULL;
+    import_array();
+
+    module = PY_MODULE_CREATE;
+
+    memset(not_datelike, 0, sizeof(not_datelike));
+    not_datelike['a'] = not_datelike['A'] = 1;
+    not_datelike['m'] = not_datelike['M'] = 1;
+    not_datelike['p'] = not_datelike['P'] = 1;
+    not_datelike['t'] = not_datelike['T'] = 1;
+
+    PY_RETURN_MODULE;
+}
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index a3fde2c2bf4dd..d884007725d6e 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -13,6 +13,7 @@
 
 import numpy as np
 
+from pandas._libs.datehelpers import concat_date_cols as _concat_date_cols
 import pandas._libs.lib as lib
 import pandas._libs.ops as libops
 import pandas._libs.parsers as parsers
@@ -3186,7 +3187,7 @@ def _make_date_converter(date_parser=None, dayfirst=False,
                          infer_datetime_format=False, cache_dates=True):
     def converter(*date_cols):
         if date_parser is None:
-            strs = _concat_date_cols(date_cols)
+            strs = _concat_date_cols(date_cols, keep_trivial_numbers=True)
 
             try:
                 return tools.to_datetime(
@@ -3216,7 +3217,11 @@ def converter(*date_cols):
             except Exception:
                 try:
                     return tools.to_datetime(
-                        parsing.try_parse_dates(_concat_date_cols(date_cols),
+                        parsing.try_parse_dates(
+                            _concat_date_cols(
+                                date_cols,
+                                keep_trivial_numbers=True
+                            ),
                                                 parser=date_parser,
                                                 dayfirst=dayfirst),
                         cache=cache_dates,
@@ -3511,15 +3516,6 @@ def _get_col_names(colspec, columns):
     return colnames
 
 
-def _concat_date_cols(date_cols):
-    if len(date_cols) == 1:
-        return np.array([str(x) for x in date_cols[0]], dtype=object)
-
-    rs = np.array([' '.join(str(y) for y in x)
-                   for x in zip(*date_cols)], dtype=object)
-    return rs
-
-
 class FixedWidthReader(BaseIterator):
     """
     A reader of fixed-width lines.
diff --git a/setup.py b/setup.py
index d121a54ded2a1..18b282d17a28b 100755
--- a/setup.py
+++ b/setup.py
@@ -243,6 +243,7 @@ def initialize_options(self):
         ujson_lib = pjoin(base, 'ujson', 'lib')
         self._clean_exclude = [pjoin(dt, 'np_datetime.c'),
                                pjoin(dt, 'np_datetime_strings.c'),
+                               pjoin(dt, 'datehelpers.c'),
                                pjoin(parser, 'tokenizer.c'),
                                pjoin(parser, 'io.c'),
                                pjoin(ujson_python, 'ujson.c'),
@@ -762,6 +763,23 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
                       extra_link_args=extra_link_args)
 extensions.append(_move_ext)
 
+# ----------------------------------------------------------------------
+# datehelpers
+datehelpers_sources = [
+    'pandas/_libs/tslibs/src/datetime/datehelpers.c',
+    'pandas/_libs/src/parser/tokenizer.c'
+]
+datehelpers_ext = Extension('pandas._libs.datehelpers',
+                            depends=[
+                                'pandas/_libs/src/parser/tokenizer.h'
+                            ],
+                            sources=datehelpers_sources,
+                            include_dirs=['pandas/_libs/src/klib/'],
+                            extra_compile_args=extra_compile_args,
+                            define_macros=macros)
+extensions.append(datehelpers_ext)
+
+
 # The build cache system does string matching below this point.
 # if you change something, be careful.
 

From 1531ec9caa9a7fc2f51f643b4585f1646b7bfabb Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 18 Mar 2019 14:40:40 +0300
Subject: [PATCH 02/42] perf bench for _concat_date_cols

---
 asv_bench/benchmarks/io/csv.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
index c51fb09ad8671..36ba7c569d34f 100644
--- a/asv_bench/benchmarks/io/csv.py
+++ b/asv_bench/benchmarks/io/csv.py
@@ -96,6 +96,35 @@ def time_read_csv(self, infer_datetime_format, format):
                  infer_datetime_format=infer_datetime_format)
 
 
+class ReadCSVConcatDatetime(StringIORewind):
+
+    iso8601 = '%Y-%m-%d %H:%M:%S'
+
+    def setup(self):
+        rng = date_range('1/1/2000', periods=50000, freq='S')
+        self.StringIO_input = StringIO('\n'.join(
+                                       rng.strftime(self.iso8601).tolist()))
+
+    def time_read_csv(self):
+        read_csv(self.data(self.StringIO_input),
+                 header=None, names=['foo'], parse_dates=['foo'],
+                 infer_datetime_format=False)
+
+
+class ReadCSVConcatDatetimeBadDateValue(StringIORewind):
+
+    params = (['nan', '0', ''],)
+    param_names = ['bad_date_value']
+
+    def setup(self, bad_date_value):
+        self.StringIO_input = StringIO(('%s,\n' % bad_date_value) * 50000)
+
+    def time_read_csv(self, bad_date_value):
+        read_csv(self.data(self.StringIO_input),
+                 header=None, names=['foo', 'bar'], parse_dates=['foo'],
+                 infer_datetime_format=False)
+
+
 class ReadCSVSkipRows(BaseIO):
 
     fname = '__test__.csv'

From 0756da952b04e74998f3aa4bf2cc4b47c357cff5 Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Mon, 18 Mar 2019 09:55:03 -0500
Subject: [PATCH 03/42] Add benchmark for _does_string_look_like_datetime

---
 asv_bench/benchmarks/io/parsers.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 asv_bench/benchmarks/io/parsers.py

diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py
new file mode 100644
index 0000000000000..2575521002528
--- /dev/null
+++ b/asv_bench/benchmarks/io/parsers.py
@@ -0,0 +1,20 @@
+from pandas._libs.tslibs.parsing import _does_string_look_like_datetime
+
+
+class DoesStringLookLikeDatetime(object):
+
+    params = (['2Q2005', '0.0', '10000'],)
+    param_names = ['value']
+
+    def setup(self, value):
+        self.objects = [value] * 1000000
+
+    def time_check_datetimes(self, value):
+        for obj in self.objects:
+            try:
+                _does_string_look_like_datetime(obj)
+            except ValueError:
+                pass
+
+
+from ..pandas_vb_common import setup  # noqa: F401

From 36b8bdb60d60dbccc95ad80f12dcaf4607870b0c Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 19 Mar 2019 16:30:23 +0300
Subject: [PATCH 04/42] implemented _does_string_look_like_datetime in cython

---
 pandas/_libs/tslibs/parsing.pyx               | 42 ++++++++++++
 .../_libs/tslibs/src/datetime/datehelpers.c   | 68 -------------------
 setup.py                                      |  4 +-
 3 files changed, 45 insertions(+), 69 deletions(-)

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index 72de28c5ac54b..a2871ca353a23 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -6,6 +6,7 @@ import time
 from io import StringIO
 
 from libc.string cimport strchr
+from cpython cimport PyUnicode_Check, PyBytes_Check, PyBytes_AsStringAndSize
 
 from cpython.datetime cimport datetime, datetime_new, import_datetime
 from cpython.version cimport PY_VERSION_HEX
@@ -31,6 +32,21 @@ from pandas._libs.tslibs.util cimport get_c_string_buf_and_size
 cdef extern from "../src/headers/portable.h":
     int getdigit_ascii(char c, int default) nogil
 
+cdef extern from "../src/parser/tokenizer.h":
+    double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
+                   int skip_trailing)
+
+cdef extern from *:
+    char* PyUnicode_AsUTF8AndSize(object unicode, Py_ssize_t* length)
+
+cdef inline bint get_string_data(object s, char **buf, Py_ssize_t *length):
+    if PyUnicode_Check(s):
+        buf[0] = PyUnicode_AsUTF8AndSize(s, length)
+        return buf[0] != NULL
+    if PyBytes_Check(s):
+        return PyBytes_AsStringAndSize(s, buf, length) == 0
+    return False
+
 # ----------------------------------------------------------------------
 # Constants
 
@@ -45,6 +61,8 @@ _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0,
 cdef:
     set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}
 
+    set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}
+
 # ----------------------------------------------------------------------
 cdef:
     const char* delimiters = " /-."
@@ -302,6 +320,30 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False,
     return parsed, parsed, reso
 
 
+cpdef bint _does_string_look_like_datetime(object date_string):
+    cdef:
+        char *buf = NULL
+        char *endptr = NULL
+        Py_ssize_t length = -1
+        double converted_date
+        char first
+
+    if not get_string_data(date_string, &buf, &length):
+        return False
+    if length >= 1:
+        first = buf[0]
+        if first == '0':
+            return True
+        elif length == 1 and date_string in _not_datelike_strings:
+            return False
+        else:
+            converted_date = xstrtod(buf, &endptr, '.', 'e', '\0', 1)
+            if errno == 0 and endptr == buf + length:
+                return converted_date >= 1000
+
+    return True
+
+
 cdef inline object _parse_dateabbr_string(object date_string, object default,
                                           object freq):
     cdef:
diff --git a/pandas/_libs/tslibs/src/datetime/datehelpers.c b/pandas/_libs/tslibs/src/datetime/datehelpers.c
index 2a4ced54d753f..551631475b4f6 100644
--- a/pandas/_libs/tslibs/src/datetime/datehelpers.c
+++ b/pandas/_libs/tslibs/src/datetime/datehelpers.c
@@ -322,63 +322,6 @@ static PyObject* concat_date_cols(PyObject *self, PyObject *args,
     }
 }
 
-static char not_datelike[sizeof(char) * 256];
-
-static PyObject* _does_string_look_like_datetime(PyObject* unused,
-                                                 PyObject* arg) {
-    char *buf = NULL, *endptr = NULL;
-    Py_ssize_t length = -1;
-    double converted_date;
-    int error = 0;
-    int result = 1;
-
-#if PY_MAJOR_VERSION == 2
-    if (!PyString_CheckExact(arg)) {
-        if (!PyUnicode_CheckExact(arg)) {
-            // arg is not a string, so it's certainly
-            // not a datetime-looking string
-            PyErr_SetString(PyExc_ValueError,
-                            "_does_string_look_like_datetime expects a string");
-            return NULL;
-        }
-        buf = PyUnicode_AS_DATA(arg);
-        length = (int)PyUnicode_GET_SIZE(arg);
-    } else {
-        if (PyString_AsStringAndSize(arg, &buf, &length) == -1) {
-            return NULL;
-        }
-    }
-#else
-    if (!PyUnicode_CheckExact(arg) || !PyUnicode_IS_READY(arg)) {
-        PyErr_SetString(PyExc_ValueError,
-                        "_does_string_look_like_datetime expects a string");
-        return NULL;
-    }
-    buf = PyUnicode_DATA(arg);
-    length = PyUnicode_GET_LENGTH(arg);
-#endif
-
-    if (length >= 1) {
-        char first = *buf;
-        if (first == '0') {
-            result = 1;
-        } else if (length == 1 && not_datelike[Py_CHARMASK(first)]) {
-            result = 0;
-        } else {
-            converted_date = xstrtod(buf, &endptr, '.', 'e', '\0', 1);
-            if ((errno == 0) && (endptr == buf + length)) {
-                result = (converted_date >= 1000) ? 1 : 0;
-            }
-        }
-    }
-
-    if (result) {
-        Py_RETURN_TRUE;
-    } else {
-        Py_RETURN_FALSE;
-    }
-}
-
 static PyMethodDef module_methods[] = {
     /* name from python, name in C-file, ..., __doc__ string of method */
     {
@@ -386,11 +329,6 @@ static PyMethodDef module_methods[] = {
         METH_VARARGS | METH_KEYWORDS,
         "concatenates date cols and returns numpy array"
     },
-    {
-        "_does_string_look_like_datetime", _does_string_look_like_datetime,
-        METH_O,
-        "checks if string looks like a datetime"
-    },
     {NULL, NULL, 0, NULL}
 };
 
@@ -418,11 +356,5 @@ PY_DATEHELPERS_MODULE_INIT {
 
     module = PY_MODULE_CREATE;
 
-    memset(not_datelike, 0, sizeof(not_datelike));
-    not_datelike['a'] = not_datelike['A'] = 1;
-    not_datelike['m'] = not_datelike['M'] = 1;
-    not_datelike['p'] = not_datelike['P'] = 1;
-    not_datelike['t'] = not_datelike['T'] = 1;
-
     PY_RETURN_MODULE;
 }
diff --git a/setup.py b/setup.py
index 18b282d17a28b..705fa0b24ddd4 100755
--- a/setup.py
+++ b/setup.py
@@ -635,7 +635,9 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
         'sources': np_datetime_sources},
     '_libs.tslibs.parsing': {
         'pyxfile': '_libs/tslibs/parsing',
-        'include': []},
+        'include': common_include,
+        'depends': ['pandas/_libs/src/parser/tokenizer.h'],
+        'sources': ['pandas/_libs/src/parser/tokenizer.c']},
     '_libs.tslibs.period': {
         'pyxfile': '_libs/tslibs/period',
         'include': ts_include,

From a9afbdbac2cee171f8facb362509b1d519655ad7 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 19 Mar 2019 18:25:46 +0300
Subject: [PATCH 05/42] new benchmark for _concat_date_cols func

---
 asv_bench/benchmarks/io/parsers.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py
index 2575521002528..1e0010a000c34 100644
--- a/asv_bench/benchmarks/io/parsers.py
+++ b/asv_bench/benchmarks/io/parsers.py
@@ -1,4 +1,6 @@
 from pandas._libs.tslibs.parsing import _does_string_look_like_datetime
+from pandas.io.parsers import _concat_date_cols
+import numpy as np
 
 
 class DoesStringLookLikeDatetime(object):
@@ -17,4 +19,18 @@ def time_check_datetimes(self, value):
                 pass
 
 
-from ..pandas_vb_common import setup  # noqa: F401
+class ConcatDateCols(object):
+
+    params = ([1234567890, 'AAAA'], [1, 2])
+    param_names = ['value', 'dim']
+
+    def setup(self, value, dim):
+        count_elem = 1000000
+        if dim == 1:
+            self.object = (np.array([value] * count_elem),)
+        if dim == 2:
+            self.object = (np.array([value] * count_elem),
+                           np.array([value] * count_elem))
+
+    def time_check_concat(self, value, dim):
+        _concat_date_cols(self.object)

From ee1f32baba89b83c2af897dd01107613e887a0ec Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Wed, 20 Mar 2019 21:21:46 +0300
Subject: [PATCH 06/42] init cython version of _concat_date_cols

---
 pandas/_libs/lib.pyx | 145 ++++++++++++++++++++++++++++++++++++++++---
 pandas/io/parsers.py |   3 +-
 2 files changed, 139 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 7f66b93b58a1a..7f54931ce4203 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -8,10 +8,11 @@ import warnings
 import cython
 from cython import Py_ssize_t
 
-from cpython cimport (Py_INCREF, PyTuple_SET_ITEM,
-                      PyTuple_New,
+from cpython cimport (PyErr_SetString, Py_INCREF, PyTuple_SET_ITEM,
+                      PyTuple_New, PyObject_Str, PyList_SetItem,
                       Py_EQ,
-                      PyObject_RichCompareBool)
+                      PyObject_RichCompareBool,
+                      PyUnicode_Join, PyList_New)
 
 from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
                                PyTime_Check, PyDelta_Check,
@@ -20,12 +21,12 @@ PyDateTime_IMPORT
 
 import numpy as np
 cimport numpy as cnp
-from numpy cimport (ndarray, PyArray_GETITEM,
+from numpy cimport (ndarray, PyArray_GETITEM, PyArray_CheckExact,
                     PyArray_ITER_DATA, PyArray_ITER_NEXT, PyArray_IterNew,
-                    flatiter, NPY_OBJECT,
-                    int64_t,
-                    float32_t, float64_t,
-                    uint8_t, uint64_t,
+                    flatiter, NPY_OBJECT, PyArray_SETITEM,
+                    int64_t, PyArray_GETPTR1,
+                    float32_t, float64_t, npy_intp, PyArray_NDIM,
+                    uint8_t, uint64_t, PyArray_ZEROS,
                     complex128_t)
 cnp.import_array()
 
@@ -46,6 +47,9 @@ cdef extern from "numpy/arrayobject.h":
             object fields
             tuple names
 
+cdef extern from "Python.h":
+    object PyUnicode_FromFormat(const char *format, ...)
+
 
 cdef extern from "src/parse_helper.h":
     int floatify(object, float64_t *result, int *maybe_int) except -1
@@ -2314,3 +2318,128 @@ def fast_multiget(dict mapping, ndarray keys, default=np.nan):
             output[i] = default
 
     return maybe_convert_objects(output)
+
+
+cdef inline int convert_and_set_item(object item, Py_ssize_t index,
+                                     object result,
+                                     int keep_trivial_numbers):
+    cdef:
+        int do_convert = 1
+        object str_item
+        int int_item
+        double double_item
+
+    if keep_trivial_numbers:
+        if isinstance(item, int):
+            int_item = item
+            if int_item == 0:
+                do_convert = 0
+        elif isinstance(item, float):
+            double_item = item
+            if double_item == 0.0:
+                do_convert = 0
+
+    if do_convert:
+        if not isinstance(item, (str, bytes)):
+            str_item = PyObject_Str(item)
+            item = str_item
+
+    if PyArray_SETITEM(result, PyArray_GETPTR1(result, index), item):
+        PyErr_SetString(RuntimeError, "Cannot set resulting item")
+        return 0
+
+    return 1
+
+
+cpdef int put_object_as_unicode(object list, Py_ssize_t idx, object item):
+    if not isinstance(item, str):
+        item = PyObject_Str(item)
+    Py_INCREF(item)
+    return 1 if PyList_SetItem(list, idx, item) == 0 else 0
+
+cpdef object _concat_date_cols(object date_cols,
+                               object keep_trivial_numbers=False):
+    cdef:
+        object sequence
+        int keep_numbers, all_numpy = 1
+        Py_ssize_t sequence_size
+        Py_ssize_t array_size, min_array_size = 0
+        Py_ssize_t i, j
+        object result, arrays
+        object array, fast_array, item
+        npy_intp dims[1]
+        object separator
+        object list_to_join, result_string
+
+    sequence = date_cols
+    keep_numbers = keep_trivial_numbers
+    sequence_size = len(date_cols)
+
+    if sequence_size == -1:
+        return None
+    elif sequence_size == 0:
+        return np.zeros(0, dtype=object)
+    elif sequence_size == 1:
+        array = sequence[0]
+        array_size = len(array)
+        dims[0] = array_size
+        result = PyArray_ZEROS(1, dims, NPY_OBJECT, 0)
+        if PyArray_CheckExact(array):
+            for i in range(array_size):
+                item = PyArray_GETITEM(array,
+                                       PyArray_GETPTR1(array, i))
+                if not convert_and_set_item(item, i, result, keep_numbers):
+                    raise RuntimeError
+        else:
+            if not isinstance(array, (tuple, list)):
+                fast_array = tuple(array)
+            else:
+                fast_array = array
+            for i in range(array_size):
+                item = fast_array[i]
+                if not convert_and_set_item(item, i, result, keep_numbers):
+                    raise RuntimeError
+
+        return result
+    else:
+        arrays = list(sequence)
+        for i in range(sequence_size):
+            array = arrays[i]
+            if PyArray_CheckExact(array):
+                if PyArray_NDIM(array) != 1:
+                    raise RuntimeError("ndarrays must be 1-dimentional")
+            elif not isinstance(array, (tuple, list)):
+                all_numpy = 0
+                fast_array = tuple(array)
+                array = fast_array
+            else:
+                all_numpy = 0
+            if len(array) < min_array_size or min_array_size == 0:
+                min_array_size = len(array)
+        dims[0] = min_array_size
+        result = PyArray_ZEROS(1, dims, NPY_OBJECT, 0)
+
+        separator = PyUnicode_FromFormat(" ")
+        list_to_join = PyList_New(sequence_size)
+
+        for i in range(min_array_size):
+            if all_numpy:
+                for j in range(sequence_size):
+                    array = arrays[j]
+                    item = PyArray_GETITEM(array, PyArray_GETPTR1(array, i))
+                    if not put_object_as_unicode(list_to_join, j, item):
+                        raise RuntimeError
+            else:
+                for j in range(sequence_size):
+                    array = arrays[j]
+                    item = array[i]
+                    if not put_object_as_unicode(list_to_join, j, item):
+                        raise RuntimeError
+
+            result_string = PyUnicode_Join(separator, list_to_join)
+
+            if (PyArray_SETITEM(result, PyArray_GETPTR1(result, i),
+                                result_string) != 0):
+                raise RuntimeError("Cannot set resulting item")
+
+        return result
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index d884007725d6e..4e55a566723be 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -13,7 +13,8 @@
 
 import numpy as np
 
-from pandas._libs.datehelpers import concat_date_cols as _concat_date_cols
+#from pandas._libs.datehelpers import concat_date_cols as _concat_date_cols
+from pandas._libs.lib import _concat_date_cols
 import pandas._libs.lib as lib
 import pandas._libs.ops as libops
 import pandas._libs.parsers as parsers

From 84e1b007fae8aed241260389f7740db7e8662c29 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Wed, 20 Mar 2019 21:24:32 +0300
Subject: [PATCH 07/42] fix C version of _concat_date_cols

---
 pandas/_libs/tslibs/src/datetime/datehelpers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/tslibs/src/datetime/datehelpers.c b/pandas/_libs/tslibs/src/datetime/datehelpers.c
index 551631475b4f6..174b88fe02468 100644
--- a/pandas/_libs/tslibs/src/datetime/datehelpers.c
+++ b/pandas/_libs/tslibs/src/datetime/datehelpers.c
@@ -237,7 +237,7 @@ static PyObject* concat_date_cols(PyObject *self, PyObject *args,
                     Py_XDECREF(fast_array);
                     return free_arrays(arrays, sequence_size);
                 }
-                Py_DECREF(array);
+                Py_DECREF(*parray);
                 arrays[i] = fast_array;
 
                 if (array_size < min_array_size || min_array_size == 0) {

From 2cf9f22d2bd139fec452117115aee4ef14a18bc8 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Wed, 20 Mar 2019 21:27:37 +0300
Subject: [PATCH 08/42] added ConcatDateColsList benchmark

---
 asv_bench/benchmarks/io/parsers.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py
index 1e0010a000c34..a85dd79b6f3e0 100644
--- a/asv_bench/benchmarks/io/parsers.py
+++ b/asv_bench/benchmarks/io/parsers.py
@@ -34,3 +34,19 @@ def setup(self, value, dim):
 
     def time_check_concat(self, value, dim):
         _concat_date_cols(self.object)
+
+class ConcatDateColsList(object):
+
+    params = ([1234567890, 'AAAA'], [1, 2])
+    param_names = ['value', 'dim']
+
+    def setup(self, value, dim):
+        count_elem = 1000000
+        if dim == 1:
+            self.object = ([value] * count_elem,)
+        if dim == 2:
+            self.object = ([value] * count_elem,
+                           [value] * count_elem)
+
+    def time_check_concat(self, value, dim):
+        _concat_date_cols(self.object)
\ No newline at end of file

From 28fd5f51f5119dff43521d1d4451c9c11ae8b7ac Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 21 Mar 2019 14:23:28 +0300
Subject: [PATCH 09/42] ready cython version, combined concat benchmarks

---
 asv_bench/benchmarks/io/parsers.py |  32 ++----
 pandas/_libs/lib.pyx               | 156 +++++++++--------------------
 2 files changed, 56 insertions(+), 132 deletions(-)

diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py
index a85dd79b6f3e0..fa06d1b24e436 100644
--- a/asv_bench/benchmarks/io/parsers.py
+++ b/asv_bench/benchmarks/io/parsers.py
@@ -21,32 +21,16 @@ def time_check_datetimes(self, value):
 
 class ConcatDateCols(object):
 
-    params = ([1234567890, 'AAAA'], [1, 2])
-    param_names = ['value', 'dim']
+    params = ([1234567890, 'AAAA'], [1, 2], [np.array, list])
+    param_names = ['value', 'dim', 'container']
 
-    def setup(self, value, dim):
-        count_elem = 1000000
+    def setup(self, value, dim, container):
+        count_elem = 10000
         if dim == 1:
-            self.object = (np.array([value] * count_elem),)
+            self.object = (container([value] * count_elem),)
         if dim == 2:
-            self.object = (np.array([value] * count_elem),
-                           np.array([value] * count_elem))
+            self.object = (container([value] * count_elem),
+                           container([value] * count_elem))
 
-    def time_check_concat(self, value, dim):
+    def time_check_concat(self, value, dim, container):
         _concat_date_cols(self.object)
-
-class ConcatDateColsList(object):
-
-    params = ([1234567890, 'AAAA'], [1, 2])
-    param_names = ['value', 'dim']
-
-    def setup(self, value, dim):
-        count_elem = 1000000
-        if dim == 1:
-            self.object = ([value] * count_elem,)
-        if dim == 2:
-            self.object = ([value] * count_elem,
-                           [value] * count_elem)
-
-    def time_check_concat(self, value, dim):
-        _concat_date_cols(self.object)
\ No newline at end of file
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 7f54931ce4203..5e125b3335cb1 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -8,10 +8,8 @@ import warnings
 import cython
 from cython import Py_ssize_t
 
-from cpython cimport (PyErr_SetString, Py_INCREF, PyTuple_SET_ITEM,
-                      PyTuple_New, PyObject_Str, PyList_SetItem,
-                      Py_EQ,
-                      PyObject_RichCompareBool,
+from cpython cimport (Py_INCREF, PyTuple_SET_ITEM, PyTuple_New, PyObject_Str,
+                      Py_EQ, Py_SIZE, PyObject_RichCompareBool,
                       PyUnicode_Join, PyList_New)
 
 from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
@@ -21,13 +19,11 @@ PyDateTime_IMPORT
 
 import numpy as np
 cimport numpy as cnp
-from numpy cimport (ndarray, PyArray_GETITEM, PyArray_CheckExact,
+from numpy cimport (ndarray, PyArray_GETITEM,
                     PyArray_ITER_DATA, PyArray_ITER_NEXT, PyArray_IterNew,
-                    flatiter, NPY_OBJECT, PyArray_SETITEM,
-                    int64_t, PyArray_GETPTR1,
-                    float32_t, float64_t, npy_intp, PyArray_NDIM,
-                    uint8_t, uint64_t, PyArray_ZEROS,
-                    complex128_t)
+                    flatiter, NPY_OBJECT,
+                    int64_t, float32_t, float64_t,
+                    uint8_t, uint64_t, complex128_t)
 cnp.import_array()
 
 cdef extern from "numpy/arrayobject.h":
@@ -47,9 +43,6 @@ cdef extern from "numpy/arrayobject.h":
             object fields
             tuple names
 
-cdef extern from "Python.h":
-    object PyUnicode_FromFormat(const char *format, ...)
-
 
 cdef extern from "src/parse_helper.h":
     int floatify(object, float64_t *result, int *maybe_int) except -1
@@ -2320,126 +2313,73 @@ def fast_multiget(dict mapping, ndarray keys, default=np.nan):
     return maybe_convert_objects(output)
 
 
-cdef inline int convert_and_set_item(object item, Py_ssize_t index,
-                                     object result,
-                                     int keep_trivial_numbers):
+cdef inline void convert_and_set_item(object item, Py_ssize_t index,
+                                      object[:] result,
+                                      bint keep_trivial_numbers):
     cdef:
-        int do_convert = 1
-        object str_item
-        int int_item
-        double double_item
+        bint do_convert = 1
 
     if keep_trivial_numbers:
-        if isinstance(item, int):
-            int_item = item
-            if int_item == 0:
+        if isinstance(item, int) and Py_SIZE(item) < 2:
+            if <int>item == 0:
                 do_convert = 0
         elif isinstance(item, float):
-            double_item = item
-            if double_item == 0.0:
+            if <double>item == 0.0:
                 do_convert = 0
 
-    if do_convert:
-        if not isinstance(item, (str, bytes)):
-            str_item = PyObject_Str(item)
-            item = str_item
-
-    if PyArray_SETITEM(result, PyArray_GETPTR1(result, index), item):
-        PyErr_SetString(RuntimeError, "Cannot set resulting item")
-        return 0
+    if do_convert and not isinstance(item, (str, bytes)):
+        item = PyObject_Str(item)
 
-    return 1
+    result[index] = item
 
 
-cpdef int put_object_as_unicode(object list, Py_ssize_t idx, object item):
+cdef inline void put_object_as_unicode(object[:] lst, Py_ssize_t idx,
+                                       object item):
     if not isinstance(item, str):
         item = PyObject_Str(item)
-    Py_INCREF(item)
-    return 1 if PyList_SetItem(list, idx, item) == 0 else 0
+    lst[idx] = item
+
 
 cpdef object _concat_date_cols(object date_cols,
                                object keep_trivial_numbers=False):
     cdef:
-        object sequence
-        int keep_numbers, all_numpy = 1
-        Py_ssize_t sequence_size
-        Py_ssize_t array_size, min_array_size = 0
-        Py_ssize_t i, j
-        object result, arrays
-        object array, fast_array, item
-        npy_intp dims[1]
-        object separator
+        bint keep_numbers
+        Py_ssize_t sequence_size, i, j
+        Py_ssize_t array_size, min_size
+        object result
+        object separator = " "
         object list_to_join, result_string
+        object[:] list_view
+        object[:] result_view
+        object[:] iterator
+        object[::] arrays
 
-    sequence = date_cols
     keep_numbers = keep_trivial_numbers
     sequence_size = len(date_cols)
 
-    if sequence_size == -1:
-        return None
-    elif sequence_size == 0:
-        return np.zeros(0, dtype=object)
+    if sequence_size == 0:
+        result = np.zeros(0, dtype=object)
     elif sequence_size == 1:
-        array = sequence[0]
-        array_size = len(array)
-        dims[0] = array_size
-        result = PyArray_ZEROS(1, dims, NPY_OBJECT, 0)
-        if PyArray_CheckExact(array):
-            for i in range(array_size):
-                item = PyArray_GETITEM(array,
-                                       PyArray_GETPTR1(array, i))
-                if not convert_and_set_item(item, i, result, keep_numbers):
-                    raise RuntimeError
-        else:
-            if not isinstance(array, (tuple, list)):
-                fast_array = tuple(array)
-            else:
-                fast_array = array
-            for i in range(array_size):
-                item = fast_array[i]
-                if not convert_and_set_item(item, i, result, keep_numbers):
-                    raise RuntimeError
-
-        return result
+        iterator = date_cols[0]
+        array_size = len(iterator)
+        result = np.zeros(array_size, dtype=object)
+        result_view = result
+        for i in range(array_size):
+            convert_and_set_item(iterator[i], i, result_view, keep_numbers)
     else:
-        arrays = list(sequence)
-        for i in range(sequence_size):
-            array = arrays[i]
-            if PyArray_CheckExact(array):
-                if PyArray_NDIM(array) != 1:
-                    raise RuntimeError("ndarrays must be 1-dimentional")
-            elif not isinstance(array, (tuple, list)):
-                all_numpy = 0
-                fast_array = tuple(array)
-                array = fast_array
-            else:
-                all_numpy = 0
-            if len(array) < min_array_size or min_array_size == 0:
-                min_array_size = len(array)
-        dims[0] = min_array_size
-        result = PyArray_ZEROS(1, dims, NPY_OBJECT, 0)
+        arrays = date_cols
 
-        separator = PyUnicode_FromFormat(" ")
-        list_to_join = PyList_New(sequence_size)
+        min_size = min([len(arr) for arr in date_cols])
+        result = np.zeros(min_size, dtype=object)
+        result_view = result
 
-        for i in range(min_array_size):
-            if all_numpy:
-                for j in range(sequence_size):
-                    array = arrays[j]
-                    item = PyArray_GETITEM(array, PyArray_GETPTR1(array, i))
-                    if not put_object_as_unicode(list_to_join, j, item):
-                        raise RuntimeError
-            else:
-                for j in range(sequence_size):
-                    array = arrays[j]
-                    item = array[i]
-                    if not put_object_as_unicode(list_to_join, j, item):
-                        raise RuntimeError
+        list_to_join = PyList_New(sequence_size)
+        list_view = list_to_join
 
+        for i in range(min_size):
+            for j in range(sequence_size):
+                put_object_as_unicode(list_view, j, arrays[j][i])
             result_string = PyUnicode_Join(separator, list_to_join)
+            result_view[i] = result_string
 
-            if (PyArray_SETITEM(result, PyArray_GETPTR1(result, i),
-                                result_string) != 0):
-                raise RuntimeError("Cannot set resulting item")
-
-        return result
+    return result

From 1f17cf974a91f8e3613246df60533480f47d8321 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 21 Mar 2019 14:30:40 +0300
Subject: [PATCH 10/42] added forgotten check for float NaN

---
 pandas/_libs/lib.pyx | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 5e125b3335cb1..a05656b741bc5 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2318,13 +2318,15 @@ cdef inline void convert_and_set_item(object item, Py_ssize_t index,
                                       bint keep_trivial_numbers):
     cdef:
         bint do_convert = 1
+        double double_item
 
     if keep_trivial_numbers:
         if isinstance(item, int) and Py_SIZE(item) < 2:
             if <int>item == 0:
                 do_convert = 0
         elif isinstance(item, float):
-            if <double>item == 0.0:
+            double_item = item
+            if double_item == 0.0 or double_item != double_item:
                 do_convert = 0
 
     if do_convert and not isinstance(item, (str, bytes)):

From d1f8ce5093a44efd73b6e6466f31ed9533a0d9fd Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Thu, 21 Mar 2019 17:50:15 +0300
Subject: [PATCH 11/42] Cython version of _concat_date_cols works for all cases

---
 pandas/_libs/lib.pyx | 69 ++++++++++++++++++++++++++++++++------------
 1 file changed, 50 insertions(+), 19 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index a05656b741bc5..04a0bfd12e5e1 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -19,7 +19,7 @@ PyDateTime_IMPORT
 
 import numpy as np
 cimport numpy as cnp
-from numpy cimport (ndarray, PyArray_GETITEM,
+from numpy cimport (ndarray, PyArray_GETITEM, PyArray_Check,
                     PyArray_ITER_DATA, PyArray_ITER_NEXT, PyArray_IterNew,
                     flatiter, NPY_OBJECT,
                     int64_t, float32_t, float64_t,
@@ -2335,26 +2335,29 @@ cdef inline void convert_and_set_item(object item, Py_ssize_t index,
     result[index] = item
 
 
-cdef inline void put_object_as_unicode(object[:] lst, Py_ssize_t idx,
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline void put_object_as_unicode(list lst, Py_ssize_t idx,
                                        object item):
     if not isinstance(item, str):
         item = PyObject_Str(item)
     lst[idx] = item
 
 
+@cython.wraparound(False)
+@cython.boundscheck(False)
 cpdef object _concat_date_cols(object date_cols,
                                object keep_trivial_numbers=False):
     cdef:
         bint keep_numbers
         Py_ssize_t sequence_size, i, j
         Py_ssize_t array_size, min_size
-        object result
-        object separator = " "
-        object list_to_join, result_string
-        object[:] list_view
         object[:] result_view
-        object[:] iterator
-        object[::] arrays
+        object[:,:] arrays_view
+
+        object[:] obj_iter
+        int64_t[:] int_iter
+        float64_t[:] double_iter
 
     keep_numbers = keep_trivial_numbers
     sequence_size = len(date_cols)
@@ -2362,26 +2365,54 @@ cpdef object _concat_date_cols(object date_cols,
     if sequence_size == 0:
         result = np.zeros(0, dtype=object)
     elif sequence_size == 1:
-        iterator = date_cols[0]
-        array_size = len(iterator)
+        array = date_cols[0]
+        array_size = len(array)
         result = np.zeros(array_size, dtype=object)
         result_view = result
-        for i in range(array_size):
-            convert_and_set_item(iterator[i], i, result_view, keep_numbers)
+        if PyArray_Check(array):
+            if array.dtype == np.int64:
+                int_iter = array
+                for i in range(array_size):
+                    convert_and_set_item(int_iter[i], i,
+                                         result_view, keep_numbers)
+            elif array.dtype == np.float64:
+                double_iter = array
+                for i in range(array_size):
+                    convert_and_set_item(double_iter[i], i,
+                                         result_view, keep_numbers)
+            else:
+                if array.dtype == object:
+                    obj_iter = array
+                else:
+                    obj_array = np.astype(object)
+                    obj_iter = obj_array
+                for i in range(array_size):
+                    convert_and_set_item(obj_iter[i], i, result_view, keep_numbers)
+        else:
+            for i, item in enumerate(array):
+                convert_and_set_item(item, i, result_view, keep_numbers)
     else:
-        arrays = date_cols
-
         min_size = min([len(arr) for arr in date_cols])
+
+        arrays = np.zeros((len(date_cols), min_size), dtype=object)
+        for idx, array in enumerate(date_cols):
+            if PyArray_Check(array):
+                if array.dtype == object:
+                    arrays[idx] = array
+                else:
+                    arrays[idx] = array.astype(object)
+            else:
+                arrays[idx] = np.array(array, dtype=object)
+        arrays_view = arrays
+
         result = np.zeros(min_size, dtype=object)
         result_view = result
 
-        list_to_join = PyList_New(sequence_size)
-        list_view = list_to_join
+        list_to_join = [None] * sequence_size
 
         for i in range(min_size):
             for j in range(sequence_size):
-                put_object_as_unicode(list_view, j, arrays[j][i])
-            result_string = PyUnicode_Join(separator, list_to_join)
-            result_view[i] = result_string
+                put_object_as_unicode(list_to_join, j, arrays_view[j, i])
+            result_view[i] = PyUnicode_Join(' ', list_to_join)
 
     return result

From e44212c66b50d04c6f34d7f49575fdbb53722d6d Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Thu, 21 Mar 2019 18:21:31 +0300
Subject: [PATCH 12/42] Fix typo in _concat_date_cols

---
 pandas/_libs/lib.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 04a0bfd12e5e1..c576e0e0514a6 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2384,7 +2384,7 @@ cpdef object _concat_date_cols(object date_cols,
                 if array.dtype == object:
                     obj_iter = array
                 else:
-                    obj_array = np.astype(object)
+                    obj_array = array.astype(object)
                     obj_iter = obj_array
                 for i in range(array_size):
                     convert_and_set_item(obj_iter[i], i, result_view, keep_numbers)

From 6af73bf1a78cad4eec7c05c323269a56353651ba Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 22 Mar 2019 00:52:50 +0300
Subject: [PATCH 13/42] used flatiter for numpy array

---
 pandas/_libs/lib.pyx | 75 ++++++++++++++++++++++----------------------
 1 file changed, 37 insertions(+), 38 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index c576e0e0514a6..d03c5075014d9 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2346,18 +2346,22 @@ cdef inline void put_object_as_unicode(list lst, Py_ssize_t idx,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-cpdef object _concat_date_cols(object date_cols,
+cpdef object _concat_date_cols(tuple date_cols,
                                object keep_trivial_numbers=False):
     cdef:
         bint keep_numbers
         Py_ssize_t sequence_size, i, j
-        Py_ssize_t array_size, min_size
+        Py_ssize_t array_size, min_size = 0
         object[:] result_view
         object[:,:] arrays_view
 
-        object[:] obj_iter
-        int64_t[:] int_iter
-        float64_t[:] double_iter
+        flatiter it
+        int all_numpy = 1
+        cnp.ndarray[object] iters
+        object[::1] iters_view
+        object array
+        list list_to_join
+
 
     keep_numbers = keep_trivial_numbers
     sequence_size = len(date_cols)
@@ -2370,40 +2374,28 @@ cpdef object _concat_date_cols(object date_cols,
         result = np.zeros(array_size, dtype=object)
         result_view = result
         if PyArray_Check(array):
-            if array.dtype == np.int64:
-                int_iter = array
-                for i in range(array_size):
-                    convert_and_set_item(int_iter[i], i,
-                                         result_view, keep_numbers)
-            elif array.dtype == np.float64:
-                double_iter = array
-                for i in range(array_size):
-                    convert_and_set_item(double_iter[i], i,
-                                         result_view, keep_numbers)
-            else:
-                if array.dtype == object:
-                    obj_iter = array
-                else:
-                    obj_array = array.astype(object)
-                    obj_iter = obj_array
-                for i in range(array_size):
-                    convert_and_set_item(obj_iter[i], i, result_view, keep_numbers)
-        else:
-            for i, item in enumerate(array):
+            it = <flatiter>PyArray_IterNew(array)
+            for i in range(array_size):
+                item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
                 convert_and_set_item(item, i, result_view, keep_numbers)
+                PyArray_ITER_NEXT(it)
+        else:
+            for i in range(array_size):
+                convert_and_set_item(array[i], i, result_view, keep_numbers)
     else:
-        min_size = min([len(arr) for arr in date_cols])
+        for i in range(sequence_size):
+            array = date_cols[i]
+            if not PyArray_Check(array):
+                all_numpy = 0
+            if len(array) < min_size or min_size == 0:
+                min_size = len(array)
+
+        if all_numpy:
+            iters = np.zeros(sequence_size, dtype=object)
+            iters_view = iters
+            for i in range(sequence_size):
+                iters_view[i] = PyArray_IterNew(date_cols[i])
 
-        arrays = np.zeros((len(date_cols), min_size), dtype=object)
-        for idx, array in enumerate(date_cols):
-            if PyArray_Check(array):
-                if array.dtype == object:
-                    arrays[idx] = array
-                else:
-                    arrays[idx] = array.astype(object)
-            else:
-                arrays[idx] = np.array(array, dtype=object)
-        arrays_view = arrays
 
         result = np.zeros(min_size, dtype=object)
         result_view = result
@@ -2411,8 +2403,15 @@ cpdef object _concat_date_cols(object date_cols,
         list_to_join = [None] * sequence_size
 
         for i in range(min_size):
-            for j in range(sequence_size):
-                put_object_as_unicode(list_to_join, j, arrays_view[j, i])
+            if all_numpy:
+                for j in range(sequence_size):
+                    it = <flatiter>iters_view[j]
+                    item = PyArray_GETITEM(date_cols[j], PyArray_ITER_DATA(it))
+                    put_object_as_unicode(list_to_join, j, item)
+                    PyArray_ITER_NEXT(it)
+            else:
+                for j in range(sequence_size):
+                    put_object_as_unicode(list_to_join, j, date_cols[j][i])
             result_view[i] = PyUnicode_Join(' ', list_to_join)
 
     return result

From d4305a968b7dbdfbb880b51e7e043a6adf3ae281 Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Fri, 22 Mar 2019 14:53:23 +0300
Subject: [PATCH 14/42] Fix Cython compilation issues

---
 pandas/_libs/tslibs/parsing.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index a2871ca353a23..87f194af85d03 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -332,12 +332,12 @@ cpdef bint _does_string_look_like_datetime(object date_string):
         return False
     if length >= 1:
         first = buf[0]
-        if first == '0':
+        if first == b'0':
             return True
-        elif length == 1 and date_string in _not_datelike_strings:
+        elif date_string in _not_datelike_strings:
             return False
         else:
-            converted_date = xstrtod(buf, &endptr, '.', 'e', '\0', 1)
+            converted_date = xstrtod(buf, &endptr, b'.', b'e', b'\0', 1)
             if errno == 0 and endptr == buf + length:
                 return converted_date >= 1000
 

From fa3ae05f62d43c7fad78af93930674f9793156e3 Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Fri, 22 Mar 2019 15:35:58 +0300
Subject: [PATCH 15/42] Remove C version of _concat_date_cols

---
 .../_libs/tslibs/src/datetime/datehelpers.c   | 360 ------------------
 pandas/io/parsers.py                          |   1 -
 setup.py                                      |  15 -
 3 files changed, 376 deletions(-)
 delete mode 100644 pandas/_libs/tslibs/src/datetime/datehelpers.c

diff --git a/pandas/_libs/tslibs/src/datetime/datehelpers.c b/pandas/_libs/tslibs/src/datetime/datehelpers.c
deleted file mode 100644
index 174b88fe02468..0000000000000
--- a/pandas/_libs/tslibs/src/datetime/datehelpers.c
+++ /dev/null
@@ -1,360 +0,0 @@
-#include <Python.h>
-
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
-#include <numpy/arrayobject.h>
-
-#include <string.h>
-
-#include "../../../src/inline_helper.h"
-#include "../../../src/parser/tokenizer.h"
-
-#if PY_MAJOR_VERSION >= 3
-    #define PY_STRING_CHECK(string) (PyUnicode_Check(string))
-#else
-    #define PY_STRING_CHECK(string) \
-        (PyString_Check(string) || PyUnicode_Check(string))
-#endif
-
-int PANDAS_INLINE convert_and_set_item(PyObject *item, Py_ssize_t index,
-                                       PyArrayObject *result,
-                                       int keep_trivial_numbers) {
-    int needs_decref = 0, do_convert = 1;
-    if (item == NULL) {
-        return 0;
-    }
-    if (keep_trivial_numbers) {
-        // don't convert an integer if it's zero,
-        // don't convert a float if it's zero or NaN
-#if PY_MAJOR_VERSION >= 3
-        if (PyLong_Check(item)) {
-            PyLongObject* v = (PyLongObject*)item;
-            switch (Py_SIZE(v)) {
-            case 0:
-                do_convert = 0;
-                break;
-            case 1:  // fallthrough
-            case -1:
-                if (v->ob_digit[0] == 0) {
-                    do_convert = 0;
-                }
-            }
-#else
-        if (PyInt_CheckExact(item)) {
-            if (((PyIntObject*)item)->ob_ival == 0) do_convert = 0;
-#endif
-        } else if (PyFloat_Check(item)) {
-            double v = PyFloat_AS_DOUBLE(item);
-            if (v == 0.0 || v != v) {
-                do_convert = 0;
-            }
-        }
-    }
-
-    if (do_convert) {
-        if (!PY_STRING_CHECK(item)) {
-            PyObject *str_item = PyObject_Str(item);
-            if (str_item == NULL) {
-                return 0;
-            }
-            item = str_item;
-            needs_decref = 1;
-        }
-    }
-    if (PyArray_SETITEM(result, PyArray_GETPTR1(result, index), item) != 0) {
-        PyErr_SetString(PyExc_RuntimeError, "Cannot set resulting item");
-        if (needs_decref) Py_DECREF(item);
-        return 0;
-    }
-    if (needs_decref) Py_DECREF(item);
-    return 1;
-}
-
-static int put_object_as_string(PyObject* list, Py_ssize_t idx,
-                                PyObject* item) {
-    if (!PY_STRING_CHECK(item)) {
-        PyObject* str_item = PyObject_Str(item);
-        if (str_item == NULL) {
-            return 0;
-        }
-        Py_DECREF(item);
-        item = str_item;
-    }
-    return (PyList_SetItem(list, idx, item) == 0) ? 1 : 0;
-}
-
-static PyObject* free_arrays(PyObject** arrays, Py_ssize_t size) {
-    PyObject** item = arrays;
-    Py_ssize_t i;
-    for (i = 0; i < size; ++i, ++item) Py_DECREF(*item);
-    free(arrays);
-    return NULL;
-}
-
-static PyObject* concat_date_cols(PyObject *self, PyObject *args,
-                                  PyObject *kwds) {
-    PyObject *sequence = NULL;
-    PyObject *py_keep_trivial_numbers = NULL;
-    PyArrayObject *result = NULL;
-    Py_ssize_t sequence_size = 0;
-    int keep_trivial_numbers;
-    char* kwlist[] = {"", "keep_trivial_numbers", NULL};
-
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O", kwlist,
-                                     &sequence, &py_keep_trivial_numbers)) {
-        return NULL;
-    }
-    if (!PySequence_Check(sequence)) {
-        PyErr_SetString(PyExc_TypeError, "argument must be sequence");
-        return NULL;
-    }
-    keep_trivial_numbers = (py_keep_trivial_numbers != NULL) ? \
-            PyObject_IsTrue(py_keep_trivial_numbers) : 0;
-
-    sequence_size = PySequence_Size(sequence);
-    if (sequence_size == -1) {
-        return NULL;
-    } else if (sequence_size == 0) {
-        npy_intp dims[1];
-        dims[0] = 0;
-        result = (PyArrayObject*)PyArray_ZEROS(1, dims, NPY_OBJECT, 0);
-        return (PyObject*)result;
-    } else if (sequence_size == 1) {
-        PyObject* array = PySequence_GetItem(sequence, 0);
-        Py_ssize_t array_size;
-        if (array == NULL) {
-            return NULL;
-        }
-
-        array_size = PySequence_Size(array);
-        if (array_size == -1) {
-            Py_DECREF(array);
-            return NULL;
-        }
-
-        {
-            npy_intp dims[1];
-            dims[0] = array_size;
-            result = (PyArrayObject*)PyArray_ZEROS(1, dims, NPY_OBJECT, 0);
-            if (result == NULL) {
-                Py_DECREF(array);
-                return NULL;
-            }
-        }
-
-        if (PyArray_CheckExact(array)) {
-            PyArrayObject *ndarray = (PyArrayObject*)array;
-            Py_ssize_t i;
-            for (i = 0; i < array_size; ++i) {
-                PyObject *item = PyArray_GETITEM(ndarray,
-                                                 PyArray_GETPTR1(ndarray, i));
-                if (!convert_and_set_item(item, i, result,
-                                          keep_trivial_numbers)) {
-                    Py_DECREF(result);
-                    Py_DECREF(array);
-                    Py_DECREF(item);
-                    return NULL;
-                }
-                Py_DECREF(item);
-            }
-        } else {
-            PyObject* fast_array = PySequence_Fast(array,
-                    "elements of input sequence must be sequence");
-            Py_ssize_t i;
-            if (fast_array == NULL) {
-                Py_DECREF(result);
-                Py_DECREF(array);
-                // PySequence_Fast set message, which in second argument
-                return NULL;
-            }
-
-            for (i = 0; i < array_size; ++i) {
-                PyObject* item = PySequence_Fast_GET_ITEM(fast_array, i);
-                if (!convert_and_set_item(item, i, result,
-                                          keep_trivial_numbers)) {
-                    Py_DECREF(result);
-                    Py_DECREF(array);
-                    Py_DECREF(fast_array);
-                    return NULL;
-                }
-            }
-            Py_DECREF(fast_array);
-        }
-        Py_DECREF(array);
-        return (PyObject*)result;
-    } else {
-        size_t mem_size = sizeof(PyObject*) * sequence_size;
-        PyObject **arrays = (PyObject**) malloc(mem_size);
-        PyObject *array = NULL;
-        PyObject **parray = NULL;
-        PyObject *fast_array = NULL;
-        PyObject *separator = NULL;
-        PyObject *item = NULL;
-        PyObject *list_to_join = NULL;
-        Py_ssize_t min_array_size = 0;
-        int all_numpy = 1;
-        Py_ssize_t i;
-        for (i = 0; i < sequence_size; ++i) {
-            array = PySequence_GetItem(sequence, i);
-            if (array == NULL) {
-                return free_arrays(arrays, i);
-            }
-            if (PyArray_CheckExact(array)) {
-                if (PyArray_NDIM((PyArrayObject*)array) != 1) {
-                    PyErr_SetString(PyExc_ValueError,
-                                    "ndarrays must be 1-dimentional");
-                    return free_arrays(arrays, i);
-                }
-            } else {
-                all_numpy = 0;
-            }
-            arrays[i] = array;
-        }
-
-        parray = arrays;
-        if (all_numpy) {
-            Py_ssize_t i;
-            for (i = 0; i < sequence_size; ++i, ++parray) {
-                Py_ssize_t array_size = PyArray_SIZE((PyArrayObject*)(*parray));
-
-                if (array_size < 0) {
-                    return free_arrays(arrays, sequence_size);
-                }
-
-                if (array_size < min_array_size || min_array_size == 0) {
-                    min_array_size = array_size;
-                }
-            }
-        } else {
-            Py_ssize_t i;
-            for (i = 0; i < sequence_size; ++i, ++parray) {
-                Py_ssize_t array_size;
-                fast_array = PySequence_Fast(*parray,
-                        "elements of input sequence must be sequence");
-                array_size = (fast_array != NULL) ? \
-                        PySequence_Fast_GET_SIZE(fast_array) : -1;
-
-                if (array_size < 0) {
-                    Py_XDECREF(fast_array);
-                    return free_arrays(arrays, sequence_size);
-                }
-                Py_DECREF(*parray);
-                arrays[i] = fast_array;
-
-                if (array_size < min_array_size || min_array_size == 0) {
-                    min_array_size = array_size;
-                }
-            }
-        }
-
-        {
-            npy_intp dims[1];
-            dims[0] = min_array_size;
-            result = (PyArrayObject*)PyArray_ZEROS(1, dims, NPY_OBJECT, 0);
-            if (result == NULL) {
-                return free_arrays(arrays, sequence_size);
-            }
-        }
-
-        separator = PyUnicode_FromFormat(" ");
-        if (separator == NULL) {
-            Py_DECREF(result);
-            return free_arrays(arrays, sequence_size);
-        }
-        list_to_join = PyList_New(sequence_size);
-        for (i = 0; i < min_array_size; ++i) {
-            PyObject *result_string = NULL;
-            parray = arrays;
-            if (all_numpy) {
-                Py_ssize_t j;
-                for (j = 0; j < sequence_size; ++j, ++parray) {
-                    PyArrayObject* arr = (PyArrayObject*)(*parray);
-                    item = PyArray_GETITEM(arr, PyArray_GETPTR1(arr, i));
-                    if (item == NULL) {
-                        Py_DECREF(list_to_join);
-                        Py_DECREF(result);
-                        return free_arrays(arrays, sequence_size);
-                    }
-                    if (!put_object_as_string(list_to_join, j, item)) {
-                        Py_DECREF(item);
-                        Py_DECREF(list_to_join);
-                        Py_DECREF(result);
-                        return free_arrays(arrays, sequence_size);
-                    }
-                }
-            } else {
-                Py_ssize_t j;
-                for (j = 0; j < sequence_size; ++j, ++parray) {
-                    item = PySequence_Fast_GET_ITEM(*parray, i);
-                    if (item == NULL) {
-                        Py_DECREF(list_to_join);
-                        Py_DECREF(result);
-                        return free_arrays(arrays, sequence_size);
-                    }
-                    Py_INCREF(item);
-                    if (!put_object_as_string(list_to_join, j, item)) {
-                        Py_DECREF(item);
-                        Py_DECREF(list_to_join);
-                        Py_DECREF(result);
-                        return free_arrays(arrays, sequence_size);
-                    }
-                }
-            }
-            result_string = PyUnicode_Join(separator, list_to_join);
-            if (result_string == NULL) {
-                Py_DECREF(list_to_join);
-                Py_DECREF(result);
-                return free_arrays(arrays, sequence_size);
-            }
-            if (PyArray_SETITEM(result, PyArray_GETPTR1(result, i),
-                                result_string) != 0) {
-                PyErr_SetString(PyExc_RuntimeError,
-                                "Cannot set resulting item");
-                Py_DECREF(list_to_join);
-                Py_DECREF(result);
-                Py_DECREF(result_string);
-                return free_arrays(arrays, sequence_size);
-            }
-            Py_DECREF(result_string);
-        }
-        Py_DECREF(list_to_join);
-        (void)free_arrays(arrays, sequence_size);
-        return (PyObject*)result;
-    }
-}
-
-static PyMethodDef module_methods[] = {
-    /* name from python, name in C-file, ..., __doc__ string of method */
-    {
-        "concat_date_cols", (PyCFunction)concat_date_cols,
-        METH_VARARGS | METH_KEYWORDS,
-        "concatenates date cols and returns numpy array"
-    },
-    {NULL, NULL, 0, NULL}
-};
-
-#if PY_MAJOR_VERSION >= 3
-static struct PyModuleDef moduledef = {
-    PyModuleDef_HEAD_INIT,
-    "datehelpers",                                   // name of module
-    "helpers for datetime structures manipulation",  // module documentation
-    -1,             // size of per-interpreter state of the module,
-                    // or -1 if the module keeps state in global variables.
-    module_methods
-};
-#define PY_DATEHELPERS_MODULE_INIT PyMODINIT_FUNC PyInit_datehelpers(void)
-#define PY_MODULE_CREATE PyModule_Create(&moduledef)
-#define PY_RETURN_MODULE return module
-#else
-#define PY_DATEHELPERS_MODULE_INIT void initdatehelpers(void)
-#define PY_MODULE_CREATE Py_InitModule("datehelpers", module_methods)
-#define PY_RETURN_MODULE
-#endif
-
-PY_DATEHELPERS_MODULE_INIT {
-    PyObject *module = NULL;
-    import_array();
-
-    module = PY_MODULE_CREATE;
-
-    PY_RETURN_MODULE;
-}
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 4e55a566723be..085abc60f06f9 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -13,7 +13,6 @@
 
 import numpy as np
 
-#from pandas._libs.datehelpers import concat_date_cols as _concat_date_cols
 from pandas._libs.lib import _concat_date_cols
 import pandas._libs.lib as lib
 import pandas._libs.ops as libops
diff --git a/setup.py b/setup.py
index 705fa0b24ddd4..2ad5cf5d919a3 100755
--- a/setup.py
+++ b/setup.py
@@ -243,7 +243,6 @@ def initialize_options(self):
         ujson_lib = pjoin(base, 'ujson', 'lib')
         self._clean_exclude = [pjoin(dt, 'np_datetime.c'),
                                pjoin(dt, 'np_datetime_strings.c'),
-                               pjoin(dt, 'datehelpers.c'),
                                pjoin(parser, 'tokenizer.c'),
                                pjoin(parser, 'io.c'),
                                pjoin(ujson_python, 'ujson.c'),
@@ -766,20 +765,6 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
 extensions.append(_move_ext)
 
 # ----------------------------------------------------------------------
-# datehelpers
-datehelpers_sources = [
-    'pandas/_libs/tslibs/src/datetime/datehelpers.c',
-    'pandas/_libs/src/parser/tokenizer.c'
-]
-datehelpers_ext = Extension('pandas._libs.datehelpers',
-                            depends=[
-                                'pandas/_libs/src/parser/tokenizer.h'
-                            ],
-                            sources=datehelpers_sources,
-                            include_dirs=['pandas/_libs/src/klib/'],
-                            extra_compile_args=extra_compile_args,
-                            define_macros=macros)
-extensions.append(datehelpers_ext)
 
 
 # The build cache system does string matching below this point.

From 49d66e0b5a54e8dd3d5a9175927ce6a9edeaad1d Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Fri, 22 Mar 2019 15:42:56 +0300
Subject: [PATCH 16/42] Fix linting errors

---
 pandas/_libs/lib.pyx | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index d03c5075014d9..46dd86523f84a 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2353,7 +2353,7 @@ cpdef object _concat_date_cols(tuple date_cols,
         Py_ssize_t sequence_size, i, j
         Py_ssize_t array_size, min_size = 0
         object[:] result_view
-        object[:,:] arrays_view
+        object[:, :] arrays_view
 
         flatiter it
         int all_numpy = 1
@@ -2362,7 +2362,6 @@ cpdef object _concat_date_cols(tuple date_cols,
         object array
         list list_to_join
 
-
     keep_numbers = keep_trivial_numbers
     sequence_size = len(date_cols)
 
@@ -2396,7 +2395,6 @@ cpdef object _concat_date_cols(tuple date_cols,
             for i in range(sequence_size):
                 iters_view[i] = PyArray_IterNew(date_cols[i])
 
-
         result = np.zeros(min_size, dtype=object)
         result_view = result
 

From 09e4da6cc2bcd01c04dcb14c512cf621ded7d82c Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Fri, 22 Mar 2019 08:20:12 -0500
Subject: [PATCH 17/42] Try to speed up 1D list

---
 pandas/_libs/lib.pyx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 46dd86523f84a..2316573305ef4 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2313,6 +2313,8 @@ def fast_multiget(dict mapping, ndarray keys, default=np.nan):
     return maybe_convert_objects(output)
 
 
+@cython.wraparound(False)
+@cython.boundscheck(False)
 cdef inline void convert_and_set_item(object item, Py_ssize_t index,
                                       object[:] result,
                                       bint keep_trivial_numbers):
@@ -2353,13 +2355,11 @@ cpdef object _concat_date_cols(tuple date_cols,
         Py_ssize_t sequence_size, i, j
         Py_ssize_t array_size, min_size = 0
         object[:] result_view
-        object[:, :] arrays_view
 
         flatiter it
         int all_numpy = 1
         cnp.ndarray[object] iters
         object[::1] iters_view
-        object array
         list list_to_join
 
     keep_numbers = keep_trivial_numbers
@@ -2379,8 +2379,8 @@ cpdef object _concat_date_cols(tuple date_cols,
                 convert_and_set_item(item, i, result_view, keep_numbers)
                 PyArray_ITER_NEXT(it)
         else:
-            for i in range(array_size):
-                convert_and_set_item(array[i], i, result_view, keep_numbers)
+            for i, item in enumerate(array):
+                convert_and_set_item(item, i, result_view, keep_numbers)
     else:
         for i in range(sequence_size):
             array = date_cols[i]

From 67d9509d1f9ebffd4c112e038757f26aceb3e88a Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Fri, 22 Mar 2019 08:44:03 -0500
Subject: [PATCH 18/42] Hopefully speed up 2D case

---
 pandas/_libs/lib.pyx | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 2316573305ef4..c74e35bc1e255 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2382,8 +2382,7 @@ cpdef object _concat_date_cols(tuple date_cols,
             for i, item in enumerate(array):
                 convert_and_set_item(item, i, result_view, keep_numbers)
     else:
-        for i in range(sequence_size):
-            array = date_cols[i]
+        for i, array in enumerate(date_cols):
             if not PyArray_Check(array):
                 all_numpy = 0
             if len(array) < min_size or min_size == 0:
@@ -2392,24 +2391,26 @@ cpdef object _concat_date_cols(tuple date_cols,
         if all_numpy:
             iters = np.zeros(sequence_size, dtype=object)
             iters_view = iters
-            for i in range(sequence_size):
-                iters_view[i] = PyArray_IterNew(date_cols[i])
+            for i, array in enumerate(date_cols):
+                iters_view[i] = PyArray_IterNew(array)
 
         result = np.zeros(min_size, dtype=object)
         result_view = result
 
         list_to_join = [None] * sequence_size
 
-        for i in range(min_size):
-            if all_numpy:
-                for j in range(sequence_size):
+        if all_numpy:
+            for i in range(min_size):
+                for j, array in enumerate(date_cols):
                     it = <flatiter>iters_view[j]
-                    item = PyArray_GETITEM(date_cols[j], PyArray_ITER_DATA(it))
+                    item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
                     put_object_as_unicode(list_to_join, j, item)
                     PyArray_ITER_NEXT(it)
-            else:
-                for j in range(sequence_size):
-                    put_object_as_unicode(list_to_join, j, date_cols[j][i])
-            result_view[i] = PyUnicode_Join(' ', list_to_join)
+                result_view[i] = PyUnicode_Join(' ', list_to_join)
+        else:
+            for i in range(min_size):
+                for j, array in enumerate(date_cols):
+                    put_object_as_unicode(list_to_join, j, array[i])
+                result_view[i] = PyUnicode_Join(' ', list_to_join)
 
     return result

From f05564d619f5dd726dd41e275eb506f0af234523 Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Fri, 22 Mar 2019 09:06:04 -0500
Subject: [PATCH 19/42] Fix isort, retain some comments

---
 asv_bench/benchmarks/io/parsers.py | 4 +++-
 pandas/_libs/tslibs/parsing.pyx    | 2 ++
 pandas/io/parsers.py               | 8 ++------
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py
index fa06d1b24e436..f453705c5f859 100644
--- a/asv_bench/benchmarks/io/parsers.py
+++ b/asv_bench/benchmarks/io/parsers.py
@@ -1,6 +1,8 @@
+import numpy as np
+
 from pandas._libs.tslibs.parsing import _does_string_look_like_datetime
+
 from pandas.io.parsers import _concat_date_cols
-import numpy as np
 
 
 class DoesStringLookLikeDatetime(object):
diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index 87f194af85d03..2c41c569693a7 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -333,6 +333,8 @@ cpdef bint _does_string_look_like_datetime(object date_string):
     if length >= 1:
         first = buf[0]
         if first == b'0':
+            # Strings starting with 0 are more consistent with a
+            # date-like string than a number
             return True
         elif date_string in _not_datelike_strings:
             return False
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 085abc60f06f9..2f8aa29162a24 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -13,8 +13,8 @@
 
 import numpy as np
 
-from pandas._libs.lib import _concat_date_cols
 import pandas._libs.lib as lib
+from pandas._libs.lib import _concat_date_cols
 import pandas._libs.ops as libops
 import pandas._libs.parsers as parsers
 from pandas._libs.tslibs import parsing
@@ -3217,11 +3217,7 @@ def converter(*date_cols):
             except Exception:
                 try:
                     return tools.to_datetime(
-                        parsing.try_parse_dates(
-                            _concat_date_cols(
-                                date_cols,
-                                keep_trivial_numbers=True
-                            ),
+                        parsing.try_parse_dates(_concat_date_cols(date_cols),
                                                 parser=date_parser,
                                                 dayfirst=dayfirst),
                         cache=cache_dates,

From b9c96fdd10bfb76e9cab95bab49e026c7286ac2f Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 25 Mar 2019 19:01:01 +0300
Subject: [PATCH 20/42] removed unnecessary common_include list with headers;
 some change code style

---
 pandas/_libs/lib.pyx | 29 +++++++++++++----------------
 setup.py             |  1 -
 2 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index c74e35bc1e255..e6f853258a2c1 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2349,22 +2349,17 @@ cdef inline void put_object_as_unicode(list lst, Py_ssize_t idx,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cpdef object _concat_date_cols(tuple date_cols,
-                               object keep_trivial_numbers=False):
+                               bint keep_trivial_numbers=False):
     cdef:
-        bint keep_numbers
-        Py_ssize_t sequence_size, i, j
+        Py_ssize_t i, j, sequence_size = len(date_cols)
         Py_ssize_t array_size, min_size = 0
         object[:] result_view
-
         flatiter it
         int all_numpy = 1
         cnp.ndarray[object] iters
         object[::1] iters_view
         list list_to_join
 
-    keep_numbers = keep_trivial_numbers
-    sequence_size = len(date_cols)
-
     if sequence_size == 0:
         result = np.zeros(0, dtype=object)
     elif sequence_size == 1:
@@ -2373,33 +2368,35 @@ cpdef object _concat_date_cols(tuple date_cols,
         result = np.zeros(array_size, dtype=object)
         result_view = result
         if PyArray_Check(array):
+            # for numpy array case use special api for performance
             it = <flatiter>PyArray_IterNew(array)
             for i in range(array_size):
                 item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
-                convert_and_set_item(item, i, result_view, keep_numbers)
+                convert_and_set_item(item, i, result_view, keep_trivial_numbers)
                 PyArray_ITER_NEXT(it)
         else:
             for i, item in enumerate(array):
-                convert_and_set_item(item, i, result_view, keep_numbers)
+                convert_and_set_item(item, i, result_view, keep_trivial_numbers)
     else:
         for i, array in enumerate(date_cols):
             if not PyArray_Check(array):
                 all_numpy = 0
+            # find min length for arrays in date_cols
+            # imitation python zip behavior
             if len(array) < min_size or min_size == 0:
                 min_size = len(array)
 
-        if all_numpy:
-            iters = np.zeros(sequence_size, dtype=object)
-            iters_view = iters
-            for i, array in enumerate(date_cols):
-                iters_view[i] = PyArray_IterNew(array)
-
         result = np.zeros(min_size, dtype=object)
         result_view = result
-
         list_to_join = [None] * sequence_size
 
         if all_numpy:
+            # setup iterators
+            iters = np.zeros(sequence_size, dtype=object)
+            iters_view = iters
+            for i, array in enumerate(date_cols):
+                iters_view[i] = PyArray_IterNew(array)
+            # for numpy array case use special api for performance
             for i in range(min_size):
                 for j, array in enumerate(date_cols):
                     it = <flatiter>iters_view[j]
diff --git a/setup.py b/setup.py
index 2ad5cf5d919a3..0dbf93ec925e0 100755
--- a/setup.py
+++ b/setup.py
@@ -634,7 +634,6 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
         'sources': np_datetime_sources},
     '_libs.tslibs.parsing': {
         'pyxfile': '_libs/tslibs/parsing',
-        'include': common_include,
         'depends': ['pandas/_libs/src/parser/tokenizer.h'],
         'sources': ['pandas/_libs/src/parser/tokenizer.c']},
     '_libs.tslibs.period': {

From 6dc3c5127fe4e3f806e8baf446a85b9935f7a77b Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Wed, 27 Mar 2019 21:54:13 +0300
Subject: [PATCH 21/42] using util.is_array now; changed double to float64_t;
 fix docstring

---
 doc/source/whatsnew/v0.25.0.rst |  2 ++
 pandas/_libs/lib.pyx            | 12 ++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 578e24009d35a..61d3a8f8ed517 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -252,6 +252,8 @@ Performance Improvements
 - Improved performance of :meth:`read_csv` by much faster parsing of ``MM/YYYY`` and ``DD/MM/YYYY`` datetime formats (:issue:`25922`)
 - Improved performance of nanops for dtypes that cannot store NaNs. Speedup is particularly prominent for :meth:`Series.all` and :meth:`Series.any` (:issue:`25070`)
 - Improved performance of :meth:`Series.map` for dictionary mappers on categorical series by mapping the categories instead of mapping all values (:issue:`23785`)
+- Improved performance of :meth:`read_csv` by faster concatenating date columns without extra conversion to string for integer/float zero
+  and float NaN; by faster checking the string for the possibility of being a date (:issue:`25754`)
 
 .. _whatsnew_0250.bug_fixes:
 
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index e6f853258a2c1..fd6217e400de8 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -19,7 +19,7 @@ PyDateTime_IMPORT
 
 import numpy as np
 cimport numpy as cnp
-from numpy cimport (ndarray, PyArray_GETITEM, PyArray_Check,
+from numpy cimport (ndarray, PyArray_GETITEM,
                     PyArray_ITER_DATA, PyArray_ITER_NEXT, PyArray_IterNew,
                     flatiter, NPY_OBJECT,
                     int64_t, float32_t, float64_t,
@@ -2320,15 +2320,15 @@ cdef inline void convert_and_set_item(object item, Py_ssize_t index,
                                       bint keep_trivial_numbers):
     cdef:
         bint do_convert = 1
-        double double_item
+        float64_t float_item
 
     if keep_trivial_numbers:
         if isinstance(item, int) and Py_SIZE(item) < 2:
             if <int>item == 0:
                 do_convert = 0
         elif isinstance(item, float):
-            double_item = item
-            if double_item == 0.0 or double_item != double_item:
+            float_item = item
+            if float_item == 0.0 or float_item != float_item:
                 do_convert = 0
 
     if do_convert and not isinstance(item, (str, bytes)):
@@ -2367,7 +2367,7 @@ cpdef object _concat_date_cols(tuple date_cols,
         array_size = len(array)
         result = np.zeros(array_size, dtype=object)
         result_view = result
-        if PyArray_Check(array):
+        if util.is_array(array):
             # for numpy array case use special api for performance
             it = <flatiter>PyArray_IterNew(array)
             for i in range(array_size):
@@ -2379,7 +2379,7 @@ cpdef object _concat_date_cols(tuple date_cols,
                 convert_and_set_item(item, i, result_view, keep_trivial_numbers)
     else:
         for i, array in enumerate(date_cols):
-            if not PyArray_Check(array):
+            if not util.is_array(array):
                 all_numpy = 0
             # find min length for arrays in date_cols
             # imitation python zip behavior

From 08c7f476e2733fe3a6687aa9ecd8198896cbb7c4 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 28 Mar 2019 00:57:30 +0300
Subject: [PATCH 22/42] split _concat_date_cols functionality

---
 pandas/_libs/lib.pyx | 117 +++++++++++++++++++++++++------------------
 1 file changed, 67 insertions(+), 50 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index fd6217e400de8..c7977bed86e9a 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2348,66 +2348,83 @@ cdef inline void put_object_as_unicode(list lst, Py_ssize_t idx,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-cpdef object _concat_date_cols(tuple date_cols,
-                               bint keep_trivial_numbers=False):
+cdef void concat_date_cols_numpy(tuple date_cols, object[:] result_view,
+                                 Py_ssize_t min_size,
+                                 bint keep_trivial_numbers=False):
     cdef:
         Py_ssize_t i, j, sequence_size = len(date_cols)
-        Py_ssize_t array_size, min_size = 0
-        object[:] result_view
-        flatiter it
-        int all_numpy = 1
+        list list_to_join
         cnp.ndarray[object] iters
         object[::1] iters_view
-        list list_to_join
+        flatiter it
 
-    if sequence_size == 0:
-        result = np.zeros(0, dtype=object)
-    elif sequence_size == 1:
+    if sequence_size == 1:
         array = date_cols[0]
-        array_size = len(array)
-        result = np.zeros(array_size, dtype=object)
-        result_view = result
-        if util.is_array(array):
-            # for numpy array case use special api for performance
-            it = <flatiter>PyArray_IterNew(array)
-            for i in range(array_size):
+        it = <flatiter>PyArray_IterNew(array)
+        for i in range(min_size):
+            item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
+            convert_and_set_item(item, i, result_view, keep_trivial_numbers)
+            PyArray_ITER_NEXT(it)
+    else:
+        list_to_join = [None] * sequence_size
+        # setup iterators
+        iters = np.zeros(sequence_size, dtype=object)
+        iters_view = iters
+        for i, array in enumerate(date_cols):
+            iters_view[i] = PyArray_IterNew(array)
+        for i in range(min_size):
+            for j, array in enumerate(date_cols):
+                it = <flatiter>iters_view[j]
                 item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
-                convert_and_set_item(item, i, result_view, keep_trivial_numbers)
+                put_object_as_unicode(list_to_join, j, item)
                 PyArray_ITER_NEXT(it)
-        else:
-            for i, item in enumerate(array):
-                convert_and_set_item(item, i, result_view, keep_trivial_numbers)
+            result_view[i] = PyUnicode_Join(' ', list_to_join)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef void concat_date_cols_sequence(tuple date_cols, object[:] result_view,
+                                    Py_ssize_t min_size,
+                                    bint keep_trivial_numbers=False):
+    cdef:
+        Py_ssize_t i, j, sequence_size = len(date_cols)
+        list list_to_join
+
+    if sequence_size == 1:
+        for i, item in enumerate(date_cols[0]):
+            convert_and_set_item(item, i, result_view, keep_trivial_numbers)
     else:
-        for i, array in enumerate(date_cols):
-            if not util.is_array(array):
-                all_numpy = 0
-            # find min length for arrays in date_cols
-            # imitation python zip behavior
-            if len(array) < min_size or min_size == 0:
-                min_size = len(array)
-
-        result = np.zeros(min_size, dtype=object)
-        result_view = result
         list_to_join = [None] * sequence_size
+        for i in range(min_size):
+            for j, array in enumerate(date_cols):
+                put_object_as_unicode(list_to_join, j, array[i])
+            result_view[i] = PyUnicode_Join(' ', list_to_join)
 
-        if all_numpy:
-            # setup iterators
-            iters = np.zeros(sequence_size, dtype=object)
-            iters_view = iters
-            for i, array in enumerate(date_cols):
-                iters_view[i] = PyArray_IterNew(array)
-            # for numpy array case use special api for performance
-            for i in range(min_size):
-                for j, array in enumerate(date_cols):
-                    it = <flatiter>iters_view[j]
-                    item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
-                    put_object_as_unicode(list_to_join, j, item)
-                    PyArray_ITER_NEXT(it)
-                result_view[i] = PyUnicode_Join(' ', list_to_join)
-        else:
-            for i in range(min_size):
-                for j, array in enumerate(date_cols):
-                    put_object_as_unicode(list_to_join, j, array[i])
-                result_view[i] = PyUnicode_Join(' ', list_to_join)
 
+cpdef object _concat_date_cols(tuple date_cols,
+                               bint keep_trivial_numbers=False):
+    cdef:
+        Py_ssize_t min_size = 0, sequence_size = len(date_cols)
+        cnp.ndarray[object] result
+        int all_numpy = 1
+
+    if sequence_size == 0:
+        return np.zeros(0, dtype=object)
+
+    for i, array in enumerate(date_cols):
+        if not util.is_array(array):
+            all_numpy = 0
+        # find min length for arrays in date_cols
+        # imitation python zip behavior
+        if len(array) < min_size or min_size == 0:
+            min_size = len(array)
+
+    result = np.zeros(min_size, dtype=object)
+    if all_numpy:
+        # call special function to increase performance
+        concat_date_cols_numpy(date_cols, result, min_size,
+                               keep_trivial_numbers)
+    else:
+        concat_date_cols_sequence(date_cols, result, min_size,
+                                  keep_trivial_numbers)
     return result

From ba6b86a66794e99f8975af3f6133c12978c7dd7d Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 28 Mar 2019 15:37:00 +0300
Subject: [PATCH 23/42] added error parameter for xstrtod call

---
 pandas/_libs/tslibs/parsing.pyx | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index 2c41c569693a7..38bafe8c447f0 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -34,7 +34,7 @@ cdef extern from "../src/headers/portable.h":
 
 cdef extern from "../src/parser/tokenizer.h":
     double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
-                   int skip_trailing)
+                   int skip_trailing, int *error)
 
 cdef extern from *:
     char* PyUnicode_AsUTF8AndSize(object unicode, Py_ssize_t* length)
@@ -327,6 +327,7 @@ cpdef bint _does_string_look_like_datetime(object date_string):
         Py_ssize_t length = -1
         double converted_date
         char first
+        int error = 0
 
     if not get_string_data(date_string, &buf, &length):
         return False
@@ -339,8 +340,9 @@ cpdef bint _does_string_look_like_datetime(object date_string):
         elif date_string in _not_datelike_strings:
             return False
         else:
-            converted_date = xstrtod(buf, &endptr, b'.', b'e', b'\0', 1)
-            if errno == 0 and endptr == buf + length:
+            converted_date = xstrtod(buf, &endptr,
+                                     b'.', b'e', b'\0', 1, &error)
+            if error == 0 and endptr == buf + length:
                 return converted_date >= 1000
 
     return True

From 14b9cad42a7bd7f40a3570ab8b5842ac81161c00 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 28 Mar 2019 16:03:17 +0300
Subject: [PATCH 24/42] removed Py_SIZE; renamed indexes

---
 pandas/_libs/lib.pyx | 93 +++++++++++++++++++++-----------------------
 1 file changed, 44 insertions(+), 49 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index c7977bed86e9a..ec35518d3c745 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2323,7 +2323,7 @@ cdef inline void convert_and_set_item(object item, Py_ssize_t index,
         float64_t float_item
 
     if keep_trivial_numbers:
-        if isinstance(item, int) and Py_SIZE(item) < 2:
+        if isinstance(item, int):
             if <int>item == 0:
                 do_convert = 0
         elif isinstance(item, float):
@@ -2348,83 +2348,78 @@ cdef inline void put_object_as_unicode(list lst, Py_ssize_t idx,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-cdef void concat_date_cols_numpy(tuple date_cols, object[:] result_view,
-                                 Py_ssize_t min_size,
-                                 bint keep_trivial_numbers=False):
+cdef void _concat_date_cols_numpy(tuple date_cols, object[:] result_view,
+                                  Py_ssize_t rows_count, Py_ssize_t col_count,
+                                  bint keep_trivial_numbers):
     cdef:
-        Py_ssize_t i, j, sequence_size = len(date_cols)
+        Py_ssize_t col_idx, row_idx
         list list_to_join
         cnp.ndarray[object] iters
         object[::1] iters_view
         flatiter it
 
-    if sequence_size == 1:
+    if col_count == 1:
         array = date_cols[0]
         it = <flatiter>PyArray_IterNew(array)
-        for i in range(min_size):
+        for row_idx in range(rows_count):
             item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
-            convert_and_set_item(item, i, result_view, keep_trivial_numbers)
+            convert_and_set_item(item, row_idx, result_view,
+                                 keep_trivial_numbers)
             PyArray_ITER_NEXT(it)
     else:
-        list_to_join = [None] * sequence_size
+        list_to_join = [None] * col_count
         # setup iterators
-        iters = np.zeros(sequence_size, dtype=object)
+        iters = np.zeros(col_count, dtype=object)
         iters_view = iters
-        for i, array in enumerate(date_cols):
-            iters_view[i] = PyArray_IterNew(array)
-        for i in range(min_size):
-            for j, array in enumerate(date_cols):
-                it = <flatiter>iters_view[j]
+        for col_idx, array in enumerate(date_cols):
+            iters_view[col_idx] = PyArray_IterNew(array)
+        for row_idx in range(rows_count):
+            for col_idx, array in enumerate(date_cols):
+                it = <flatiter>iters_view[col_idx]
                 item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
-                put_object_as_unicode(list_to_join, j, item)
+                put_object_as_unicode(list_to_join, col_idx, item)
                 PyArray_ITER_NEXT(it)
-            result_view[i] = PyUnicode_Join(' ', list_to_join)
+            result_view[row_idx] = PyUnicode_Join(' ', list_to_join)
 
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-cdef void concat_date_cols_sequence(tuple date_cols, object[:] result_view,
-                                    Py_ssize_t min_size,
-                                    bint keep_trivial_numbers=False):
+cdef void _concat_date_cols_sequence(tuple date_cols, object[:] result_view,
+                                     Py_ssize_t rows_count,
+                                     Py_ssize_t col_count,
+                                     bint keep_trivial_numbers):
     cdef:
-        Py_ssize_t i, j, sequence_size = len(date_cols)
+        Py_ssize_t col_idx, row_idx
         list list_to_join
 
-    if sequence_size == 1:
-        for i, item in enumerate(date_cols[0]):
-            convert_and_set_item(item, i, result_view, keep_trivial_numbers)
+    if col_count == 1:
+        for row_idx, item in enumerate(date_cols[0]):
+            convert_and_set_item(item, row_idx, result_view,
+                                 keep_trivial_numbers)
     else:
-        list_to_join = [None] * sequence_size
-        for i in range(min_size):
-            for j, array in enumerate(date_cols):
-                put_object_as_unicode(list_to_join, j, array[i])
-            result_view[i] = PyUnicode_Join(' ', list_to_join)
+        list_to_join = [None] * col_count
+        for row_idx in range(rows_count):
+            for col_idx, array in enumerate(date_cols):
+                put_object_as_unicode(list_to_join, col_idx, array[row_idx])
+            result_view[row_idx] = PyUnicode_Join(' ', list_to_join)
 
 
-cpdef object _concat_date_cols(tuple date_cols,
-                               bint keep_trivial_numbers=False):
+def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=False):
     cdef:
-        Py_ssize_t min_size = 0, sequence_size = len(date_cols)
+        Py_ssize_t rows_count = 0, col_count = len(date_cols)
         cnp.ndarray[object] result
-        int all_numpy = 1
 
-    if sequence_size == 0:
+    if col_count == 0:
         return np.zeros(0, dtype=object)
 
-    for i, array in enumerate(date_cols):
-        if not util.is_array(array):
-            all_numpy = 0
-        # find min length for arrays in date_cols
-        # imitation python zip behavior
-        if len(array) < min_size or min_size == 0:
-            min_size = len(array)
-
-    result = np.zeros(min_size, dtype=object)
-    if all_numpy:
-        # call special function to increase performance
-        concat_date_cols_numpy(date_cols, result, min_size,
-                               keep_trivial_numbers)
+    rows_count = min(len(array) for array in date_cols)
+
+    result = np.zeros(rows_count, dtype=object)
+    if all(util.is_array(array) for array in date_cols):
+        # call specialized function to increase performance
+        _concat_date_cols_numpy(date_cols, result, rows_count, col_count,
+                                keep_trivial_numbers)
     else:
-        concat_date_cols_sequence(date_cols, result, min_size,
-                                  keep_trivial_numbers)
+        _concat_date_cols_sequence(date_cols, result, rows_count, col_count,
+                                   keep_trivial_numbers)
     return result

From 4e9211b8ff8f564f69dbf0cdfc923bac6a4405ed Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Fri, 29 Mar 2019 18:55:36 +0300
Subject: [PATCH 25/42] Switch to helper method for getting C buffer of string
 object

---
 pandas/_libs/tslibs/parsing.pyx | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index 38bafe8c447f0..fe1121f0efa66 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -6,7 +6,6 @@ import time
 from io import StringIO
 
 from libc.string cimport strchr
-from cpython cimport PyUnicode_Check, PyBytes_Check, PyBytes_AsStringAndSize
 
 from cpython.datetime cimport datetime, datetime_new, import_datetime
 from cpython.version cimport PY_VERSION_HEX
@@ -36,16 +35,6 @@ cdef extern from "../src/parser/tokenizer.h":
     double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
                    int skip_trailing, int *error)
 
-cdef extern from *:
-    char* PyUnicode_AsUTF8AndSize(object unicode, Py_ssize_t* length)
-
-cdef inline bint get_string_data(object s, char **buf, Py_ssize_t *length):
-    if PyUnicode_Check(s):
-        buf[0] = PyUnicode_AsUTF8AndSize(s, length)
-        return buf[0] != NULL
-    if PyBytes_Check(s):
-        return PyBytes_AsStringAndSize(s, buf, length) == 0
-    return False
 
 # ----------------------------------------------------------------------
 # Constants
@@ -322,15 +311,14 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False,
 
 cpdef bint _does_string_look_like_datetime(object date_string):
     cdef:
-        char *buf = NULL
+        const char *buf
         char *endptr = NULL
         Py_ssize_t length = -1
         double converted_date
         char first
         int error = 0
 
-    if not get_string_data(date_string, &buf, &length):
-        return False
+    buf = get_c_string_buf_and_size(date_string, &length)
     if length >= 1:
         first = buf[0]
         if first == b'0':

From 0aefa7bccde6a57cbdecfe48c4d9560c1c8116b6 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sun, 31 Mar 2019 22:50:44 +0300
Subject: [PATCH 26/42] changed return type in _concat_date_cols_* functions
 from void to cnp.ndarray[object]

---
 pandas/_libs/lib.pyx | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index ec35518d3c745..e47c890b93f3f 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2348,15 +2348,21 @@ cdef inline void put_object_as_unicode(list lst, Py_ssize_t idx,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-cdef void _concat_date_cols_numpy(tuple date_cols, object[:] result_view,
-                                  Py_ssize_t rows_count, Py_ssize_t col_count,
-                                  bint keep_trivial_numbers):
+cdef cnp.ndarray[object] _concat_date_cols_numpy(tuple date_cols,
+                                                 Py_ssize_t rows_count,
+                                                 Py_ssize_t col_count,
+                                                 bint keep_trivial_numbers):
     cdef:
         Py_ssize_t col_idx, row_idx
         list list_to_join
         cnp.ndarray[object] iters
         object[::1] iters_view
         flatiter it
+        cnp.ndarray[object] result
+        object[:] result_view
+
+    result = np.zeros(rows_count, dtype=object)
+    result_view = result
 
     if col_count == 1:
         array = date_cols[0]
@@ -2381,16 +2387,23 @@ cdef void _concat_date_cols_numpy(tuple date_cols, object[:] result_view,
                 PyArray_ITER_NEXT(it)
             result_view[row_idx] = PyUnicode_Join(' ', list_to_join)
 
+    return result
+
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-cdef void _concat_date_cols_sequence(tuple date_cols, object[:] result_view,
-                                     Py_ssize_t rows_count,
-                                     Py_ssize_t col_count,
-                                     bint keep_trivial_numbers):
+cdef cnp.ndarray[object] _concat_date_cols_sequence(tuple date_cols,
+                                                    Py_ssize_t rows_count,
+                                                    Py_ssize_t col_count,
+                                                    bint keep_trivial_numbers):
     cdef:
         Py_ssize_t col_idx, row_idx
         list list_to_join
+        cnp.ndarray[object] result
+        object[:] result_view
+
+    result = np.zeros(rows_count, dtype=object)
+    result_view = result
 
     if col_count == 1:
         for row_idx, item in enumerate(date_cols[0]):
@@ -2403,6 +2416,8 @@ cdef void _concat_date_cols_sequence(tuple date_cols, object[:] result_view,
                 put_object_as_unicode(list_to_join, col_idx, array[row_idx])
             result_view[row_idx] = PyUnicode_Join(' ', list_to_join)
 
+    return result
+
 
 def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=False):
     cdef:
@@ -2414,12 +2429,11 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=False):
 
     rows_count = min(len(array) for array in date_cols)
 
-    result = np.zeros(rows_count, dtype=object)
     if all(util.is_array(array) for array in date_cols):
         # call specialized function to increase performance
-        _concat_date_cols_numpy(date_cols, result, rows_count, col_count,
-                                keep_trivial_numbers)
+        result = _concat_date_cols_numpy(date_cols, rows_count, col_count,
+                                         keep_trivial_numbers)
     else:
-        _concat_date_cols_sequence(date_cols, result, rows_count, col_count,
-                                   keep_trivial_numbers)
+        result = _concat_date_cols_sequence(date_cols, rows_count, col_count,
+                                            keep_trivial_numbers)
     return result

From a3a0a7776f2f25a2f88c5f038118a32c77242e5b Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sun, 31 Mar 2019 23:46:42 +0300
Subject: [PATCH 27/42] added doc-string to _concat_date_cols* functions

---
 pandas/_libs/lib.pyx | 61 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index e47c890b93f3f..cee664d8a84cd 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2352,6 +2352,28 @@ cdef cnp.ndarray[object] _concat_date_cols_numpy(tuple date_cols,
                                                  Py_ssize_t rows_count,
                                                  Py_ssize_t col_count,
                                                  bint keep_trivial_numbers):
+    """
+    Concatenates `rows_count` elements from each `col_count` numpy arrays
+    in `date_cols` into strings.
+
+    Note
+    ----
+    This function speeds up concatenation for numpy arrays.
+    You also can use `_concat_date_cols_sequence` function.
+
+    Parameters
+    ----------
+    date_cols : tuple of numpy arrays
+    rows_count : Py_ssize_t
+    col_count : Py_ssize_t
+    keep_trivial_numbers : bool, default False
+        If True, then for the case of one sequence in `date_cols`,
+        conversion (to string from integer/float zero) is not performed
+
+    Returns
+    -------
+    arr_of_rows : 1-d numpy array
+    """
     cdef:
         Py_ssize_t col_idx, row_idx
         list list_to_join
@@ -2396,6 +2418,23 @@ cdef cnp.ndarray[object] _concat_date_cols_sequence(tuple date_cols,
                                                     Py_ssize_t rows_count,
                                                     Py_ssize_t col_count,
                                                     bint keep_trivial_numbers):
+    """
+    Concatenates `rows_count` elements from each `col_count` sequences
+    in `date_cols` into strings.
+
+    Parameters
+    ----------
+    date_cols : tuple of sequences
+    rows_count : Py_ssize_t
+    col_count : Py_ssize_t
+    keep_trivial_numbers : bool, default False
+        If True, then for the case of one sequence in `date_cols`,
+        conversion (to string from integer/float zero) is not performed
+
+    Returns
+    -------
+    arr_of_rows : 1-d numpy array
+    """
     cdef:
         Py_ssize_t col_idx, row_idx
         list list_to_join
@@ -2420,6 +2459,28 @@ cdef cnp.ndarray[object] _concat_date_cols_sequence(tuple date_cols,
 
 
 def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=False):
+    """
+    Concatenates elements from sequences in `date_cols` into strings.
+
+    Parameters
+    ----------
+    date_cols : tuple of sequences
+    keep_trivial_numbers : bool, default False
+        If True, then for the case of one sequence in `date_cols`,
+        conversion (to string from integer/float zero) is not performed
+
+    Returns
+    -------
+    arr_of_rows : 1-d numpy array
+
+    Examples
+    --------
+    >>> dates=np.array(['3/31/2019', '4/31/2019'], dtype=object)
+    >>> times=np.array(['11:20', '10:45'], dtype=object)
+    >>> result = _concat_date_cols((dates, times))
+    >>> result
+    array(['3/31/2019 11:20', '4/31/2019 10:45'], dtype=object)
+    """
     cdef:
         Py_ssize_t rows_count = 0, col_count = len(date_cols)
         cnp.ndarray[object] result

From f1ae23cb4e3a135d244cc4015b1cadabf2343c47 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 1 Apr 2019 15:11:36 +0300
Subject: [PATCH 28/42] added doc-string for convert_and_set_item func; removed
 isinstance(item, bytes) check

---
 pandas/_libs/lib.pyx | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index cee664d8a84cd..cf430303ecd3b 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2318,6 +2318,17 @@ def fast_multiget(dict mapping, ndarray keys, default=np.nan):
 cdef inline void convert_and_set_item(object item, Py_ssize_t index,
                                       object[:] result,
                                       bint keep_trivial_numbers):
+    """
+    Convert `item` to str and set into result[index].
+
+    Parameters
+    ----------
+    item : object
+    index : Py_ssize_t
+    keep_trivial_numbers : bool, default False
+        If `keep_trivial_numbers` is True, then conversion
+        (to string from integer/float zero) is not performed
+    """
     cdef:
         bint do_convert = 1
         float64_t float_item
@@ -2331,7 +2342,7 @@ cdef inline void convert_and_set_item(object item, Py_ssize_t index,
             if float_item == 0.0 or float_item != float_item:
                 do_convert = 0
 
-    if do_convert and not isinstance(item, (str, bytes)):
+    if do_convert and not isinstance(item, str):
         item = PyObject_Str(item)
 
     result[index] = item
@@ -2483,7 +2494,6 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=False):
     """
     cdef:
         Py_ssize_t rows_count = 0, col_count = len(date_cols)
-        cnp.ndarray[object] result
 
     if col_count == 0:
         return np.zeros(0, dtype=object)
@@ -2492,9 +2502,8 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=False):
 
     if all(util.is_array(array) for array in date_cols):
         # call specialized function to increase performance
-        result = _concat_date_cols_numpy(date_cols, rows_count, col_count,
-                                         keep_trivial_numbers)
+        return _concat_date_cols_numpy(date_cols, rows_count, col_count,
+                                       keep_trivial_numbers)
     else:
-        result = _concat_date_cols_sequence(date_cols, rows_count, col_count,
-                                            keep_trivial_numbers)
-    return result
+        return _concat_date_cols_sequence(date_cols, rows_count, col_count,
+                                          keep_trivial_numbers)

From 8797e5323368f5cca4ef06e4fec79488255100ed Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 1 Apr 2019 15:15:10 +0300
Subject: [PATCH 29/42] fix docstrings

---
 pandas/_libs/lib.pyx | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index cf430303ecd3b..d8bfaacae1cbe 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2325,9 +2325,10 @@ cdef inline void convert_and_set_item(object item, Py_ssize_t index,
     ----------
     item : object
     index : Py_ssize_t
-    keep_trivial_numbers : bool, default False
-        If `keep_trivial_numbers` is True, then conversion
-        (to string from integer/float zero) is not performed
+    result : memoryview of 1-d ndarray
+    keep_trivial_numbers : bool
+        if True, then conversion (to string from integer/float zero)
+        is not performed
     """
     cdef:
         bint do_convert = 1
@@ -2378,12 +2379,12 @@ cdef cnp.ndarray[object] _concat_date_cols_numpy(tuple date_cols,
     rows_count : Py_ssize_t
     col_count : Py_ssize_t
     keep_trivial_numbers : bool, default False
-        If True, then for the case of one sequence in `date_cols`,
+        if True and len(date_cols) == 1, then
         conversion (to string from integer/float zero) is not performed
 
     Returns
     -------
-    arr_of_rows : 1-d numpy array
+    arr_of_rows : ndarray (dtype=object)
     """
     cdef:
         Py_ssize_t col_idx, row_idx
@@ -2439,12 +2440,12 @@ cdef cnp.ndarray[object] _concat_date_cols_sequence(tuple date_cols,
     rows_count : Py_ssize_t
     col_count : Py_ssize_t
     keep_trivial_numbers : bool, default False
-        If True, then for the case of one sequence in `date_cols`,
+        if True and len(date_cols) == 1, then
         conversion (to string from integer/float zero) is not performed
 
     Returns
     -------
-    arr_of_rows : 1-d numpy array
+    arr_of_rows : ndarray (dtype=object)
     """
     cdef:
         Py_ssize_t col_idx, row_idx
@@ -2477,12 +2478,12 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=False):
     ----------
     date_cols : tuple of sequences
     keep_trivial_numbers : bool, default False
-        If True, then for the case of one sequence in `date_cols`,
+        if True and len(date_cols) == 1, then
         conversion (to string from integer/float zero) is not performed
 
     Returns
     -------
-    arr_of_rows : 1-d numpy array
+    arr_of_rows : ndarray (dtype=object)
 
     Examples
     --------

From 3bdb452999b8cf29f09ab78a89daccda4c11b2cb Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 5 Apr 2019 22:09:48 +0300
Subject: [PATCH 30/42] currently only one conversion function is used -
 convert_to_unicode

---
 pandas/_libs/lib.pyx | 35 ++++++++++++++---------------------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index d8bfaacae1cbe..68304e6213f79 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2315,20 +2315,21 @@ def fast_multiget(dict mapping, ndarray keys, default=np.nan):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-cdef inline void convert_and_set_item(object item, Py_ssize_t index,
-                                      object[:] result,
+cdef inline object convert_to_unicode(object item,
                                       bint keep_trivial_numbers):
     """
-    Convert `item` to str and set into result[index].
+    Convert `item` to str.
 
     Parameters
     ----------
     item : object
-    index : Py_ssize_t
-    result : memoryview of 1-d ndarray
     keep_trivial_numbers : bool
         if True, then conversion (to string from integer/float zero)
         is not performed
+
+    Returns
+    -------
+    str
     """
     cdef:
         bint do_convert = 1
@@ -2346,16 +2347,7 @@ cdef inline void convert_and_set_item(object item, Py_ssize_t index,
     if do_convert and not isinstance(item, str):
         item = PyObject_Str(item)
 
-    result[index] = item
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-cdef inline void put_object_as_unicode(list lst, Py_ssize_t idx,
-                                       object item):
-    if not isinstance(item, str):
-        item = PyObject_Str(item)
-    lst[idx] = item
+    return item
 
 
 @cython.wraparound(False)
@@ -2403,8 +2395,8 @@ cdef cnp.ndarray[object] _concat_date_cols_numpy(tuple date_cols,
         it = <flatiter>PyArray_IterNew(array)
         for row_idx in range(rows_count):
             item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
-            convert_and_set_item(item, row_idx, result_view,
-                                 keep_trivial_numbers)
+            result_view[row_idx] = convert_to_unicode(item,
+                                                      keep_trivial_numbers)
             PyArray_ITER_NEXT(it)
     else:
         list_to_join = [None] * col_count
@@ -2417,7 +2409,7 @@ cdef cnp.ndarray[object] _concat_date_cols_numpy(tuple date_cols,
             for col_idx, array in enumerate(date_cols):
                 it = <flatiter>iters_view[col_idx]
                 item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
-                put_object_as_unicode(list_to_join, col_idx, item)
+                list_to_join[col_idx] = convert_to_unicode(item, False)
                 PyArray_ITER_NEXT(it)
             result_view[row_idx] = PyUnicode_Join(' ', list_to_join)
 
@@ -2458,13 +2450,14 @@ cdef cnp.ndarray[object] _concat_date_cols_sequence(tuple date_cols,
 
     if col_count == 1:
         for row_idx, item in enumerate(date_cols[0]):
-            convert_and_set_item(item, row_idx, result_view,
-                                 keep_trivial_numbers)
+            result_view[row_idx] = convert_to_unicode(item,
+                                                      keep_trivial_numbers)
     else:
         list_to_join = [None] * col_count
         for row_idx in range(rows_count):
             for col_idx, array in enumerate(date_cols):
-                put_object_as_unicode(list_to_join, col_idx, array[row_idx])
+                list_to_join[col_idx] = convert_to_unicode(array[row_idx],
+                                                           False)
             result_view[row_idx] = PyUnicode_Join(' ', list_to_join)
 
     return result

From dcbcd9a4dbce3d0ce1e442dc32664f2ecb6500e2 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 5 Apr 2019 22:41:00 +0300
Subject: [PATCH 31/42] added some comments in _concat_date_cols_numpy

---
 pandas/_libs/lib.pyx | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 68304e6213f79..68dec7e85fb9b 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2399,14 +2399,19 @@ cdef cnp.ndarray[object] _concat_date_cols_numpy(tuple date_cols,
                                                       keep_trivial_numbers)
             PyArray_ITER_NEXT(it)
     else:
+        # create fixed size list - more effecient memory allocation
         list_to_join = [None] * col_count
-        # setup iterators
         iters = np.zeros(col_count, dtype=object)
+        # create memoryview of iters ndarray, that will contain some
+        # flatiter's for each array in `date_cols` - more effecient indexing
         iters_view = iters
         for col_idx, array in enumerate(date_cols):
             iters_view[col_idx] = PyArray_IterNew(array)
+        # array elements that are on the same line are converted to one string
         for row_idx in range(rows_count):
             for col_idx, array in enumerate(date_cols):
+                # this cast is needed, because we did not find a way
+                # to efficiently store `flatiter` type objects in ndarray
                 it = <flatiter>iters_view[col_idx]
                 item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
                 list_to_join[col_idx] = convert_to_unicode(item, False)
@@ -2446,6 +2451,7 @@ cdef cnp.ndarray[object] _concat_date_cols_sequence(tuple date_cols,
         object[:] result_view
 
     result = np.zeros(rows_count, dtype=object)
+    # create memoryview of result ndarray - more effecient indexing
     result_view = result
 
     if col_count == 1:

From 1d9c7b768cb84b25a8a78e64a519522b83591b3a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 5 Apr 2019 22:58:12 +0300
Subject: [PATCH 32/42] fix problem from rebase

---
 pandas/_libs/tslibs/parsing.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index fe1121f0efa66..4d17f8d2c6273 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -33,7 +33,7 @@ cdef extern from "../src/headers/portable.h":
 
 cdef extern from "../src/parser/tokenizer.h":
     double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
-                   int skip_trailing, int *error)
+                   int skip_trailing, int *error, int *maybe_int)
 
 
 # ----------------------------------------------------------------------
@@ -329,7 +329,7 @@ cpdef bint _does_string_look_like_datetime(object date_string):
             return False
         else:
             converted_date = xstrtod(buf, &endptr,
-                                     b'.', b'e', b'\0', 1, &error)
+                                     b'.', b'e', b'\0', 1, &error, NULL)
             if error == 0 and endptr == buf + length:
                 return converted_date >= 1000
 

From b4fc8876e62234aaf75bbdc90ca54b86da46ae67 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 5 Apr 2019 23:33:29 +0300
Subject: [PATCH 33/42] added some comments in _does_string_look_like_datetime

---
 pandas/_libs/tslibs/parsing.pyx | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index 4d17f8d2c6273..ce1123670022b 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -328,8 +328,15 @@ cpdef bint _does_string_look_like_datetime(object date_string):
         elif date_string in _not_datelike_strings:
             return False
         else:
+            # xstrtod with such paramaters copies behavior of python `float`
+            # cast; for example, " 35.e-1 " is valid string for this cast so,
+            # for correctly xstrtod call necessary to pass these params:
+            # b'.' - a dot is used as separator, b'e' - an exponential form of
+            # a float number can be used, b'\0' - not to use a thousand
+            # separator, 1 - skip extra spaces before and after,
             converted_date = xstrtod(buf, &endptr,
                                      b'.', b'e', b'\0', 1, &error, NULL)
+            # if there were no errors and the whole line was parsed, then ...
             if error == 0 and endptr == buf + length:
                 return converted_date >= 1000
 

From 25ee2d27e7d2df1f27baee963ded627a22cf5ce5 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 5 Apr 2019 23:47:23 +0300
Subject: [PATCH 34/42] changed default value of keep_trivial_numbers to true

---
 pandas/_libs/lib.pyx | 4 ++--
 pandas/io/parsers.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 68dec7e85fb9b..675b059627036 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2469,14 +2469,14 @@ cdef cnp.ndarray[object] _concat_date_cols_sequence(tuple date_cols,
     return result
 
 
-def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=False):
+def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True):
     """
     Concatenates elements from sequences in `date_cols` into strings.
 
     Parameters
     ----------
     date_cols : tuple of sequences
-    keep_trivial_numbers : bool, default False
+    keep_trivial_numbers : bool, default True
         if True and len(date_cols) == 1, then
         conversion (to string from integer/float zero) is not performed
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 2f8aa29162a24..939bb6ad287e2 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -3187,7 +3187,7 @@ def _make_date_converter(date_parser=None, dayfirst=False,
                          infer_datetime_format=False, cache_dates=True):
     def converter(*date_cols):
         if date_parser is None:
-            strs = _concat_date_cols(date_cols, keep_trivial_numbers=True)
+            strs = _concat_date_cols(date_cols)
 
             try:
                 return tools.to_datetime(

From 2046dcb0f99bbd76975fa9a1bcd119495243016e Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Mon, 29 Apr 2019 07:30:05 -0500
Subject: [PATCH 35/42] Remove not needed try..except in
 _does_string_look_like_datetime benchmark

---
 asv_bench/benchmarks/io/parsers.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py
index f453705c5f859..46abb00a727da 100644
--- a/asv_bench/benchmarks/io/parsers.py
+++ b/asv_bench/benchmarks/io/parsers.py
@@ -15,10 +15,7 @@ def setup(self, value):
 
     def time_check_datetimes(self, value):
         for obj in self.objects:
-            try:
-                _does_string_look_like_datetime(obj)
-            except ValueError:
-                pass
+            _does_string_look_like_datetime(obj)
 
 
 class ConcatDateCols(object):

From 28b66704992e0d4c6df5290792c0282f8232478e Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 29 Apr 2019 16:10:06 +0300
Subject: [PATCH 36/42] upgraded doc-ststring; added some blank lines

---
 pandas/_libs/lib.pyx | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 675b059627036..586358750c3c5 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2357,19 +2357,15 @@ cdef cnp.ndarray[object] _concat_date_cols_numpy(tuple date_cols,
                                                  Py_ssize_t col_count,
                                                  bint keep_trivial_numbers):
     """
-    Concatenates `rows_count` elements from each `col_count` numpy arrays
-    in `date_cols` into strings.
-
-    Note
-    ----
-    This function speeds up concatenation for numpy arrays.
-    You also can use `_concat_date_cols_sequence` function.
+    Concatenates elements from numpy arrays into strings.
 
     Parameters
     ----------
     date_cols : tuple of numpy arrays
     rows_count : Py_ssize_t
+        count of elements from arrays that will be concatenated
     col_count : Py_ssize_t
+        count of arrays whose elements will be concatenated
     keep_trivial_numbers : bool, default False
         if True and len(date_cols) == 1, then
         conversion (to string from integer/float zero) is not performed
@@ -2377,6 +2373,11 @@ cdef cnp.ndarray[object] _concat_date_cols_numpy(tuple date_cols,
     Returns
     -------
     arr_of_rows : ndarray (dtype=object)
+
+    Notes
+    -----
+    This function speeds up concatenation for numpy arrays.
+    You also can use `_concat_date_cols_sequence` function.
     """
     cdef:
         Py_ssize_t col_idx, row_idx
@@ -2402,11 +2403,13 @@ cdef cnp.ndarray[object] _concat_date_cols_numpy(tuple date_cols,
         # create fixed size list - more effecient memory allocation
         list_to_join = [None] * col_count
         iters = np.zeros(col_count, dtype=object)
+
         # create memoryview of iters ndarray, that will contain some
         # flatiter's for each array in `date_cols` - more effecient indexing
         iters_view = iters
         for col_idx, array in enumerate(date_cols):
             iters_view[col_idx] = PyArray_IterNew(array)
+
         # array elements that are on the same line are converted to one string
         for row_idx in range(rows_count):
             for col_idx, array in enumerate(date_cols):
@@ -2428,14 +2431,15 @@ cdef cnp.ndarray[object] _concat_date_cols_sequence(tuple date_cols,
                                                     Py_ssize_t col_count,
                                                     bint keep_trivial_numbers):
     """
-    Concatenates `rows_count` elements from each `col_count` sequences
-    in `date_cols` into strings.
+    Concatenates elements from sequences into strings.
 
     Parameters
     ----------
     date_cols : tuple of sequences
     rows_count : Py_ssize_t
+        count of elements from sequences that will be concatenated
     col_count : Py_ssize_t
+        count of sequences whose elements will be concatenated
     keep_trivial_numbers : bool, default False
         if True and len(date_cols) == 1, then
         conversion (to string from integer/float zero) is not performed
@@ -2451,6 +2455,7 @@ cdef cnp.ndarray[object] _concat_date_cols_sequence(tuple date_cols,
         object[:] result_view
 
     result = np.zeros(rows_count, dtype=object)
+
     # create memoryview of result ndarray - more effecient indexing
     result_view = result
 

From 30f70ab6a02e367f5cbbb28460c6ad5a5eddb570 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 6 May 2019 12:43:43 +0300
Subject: [PATCH 37/42] removed '_concat_date_cols_sequence' func

---
 asv_bench/benchmarks/io/parsers.py | 16 ++++-----
 pandas/_libs/lib.pyx               | 57 ++----------------------------
 2 files changed, 10 insertions(+), 63 deletions(-)

diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py
index 46abb00a727da..8a54b8c044ec6 100644
--- a/asv_bench/benchmarks/io/parsers.py
+++ b/asv_bench/benchmarks/io/parsers.py
@@ -20,16 +20,16 @@ def time_check_datetimes(self, value):
 
 class ConcatDateCols(object):
 
-    params = ([1234567890, 'AAAA'], [1, 2], [np.array, list])
-    param_names = ['value', 'dim', 'container']
+    params = ([1234567890, 'AAAA'], [1, 2])
+    param_names = ['value', 'dim']
 
-    def setup(self, value, dim, container):
-        count_elem = 10000
+    def setup(self, value, dim):
+        count_elem = 100000
         if dim == 1:
-            self.object = (container([value] * count_elem),)
+            self.object = (np.array([value] * count_elem),)
         if dim == 2:
-            self.object = (container([value] * count_elem),
-                           container([value] * count_elem))
+            self.object = (np.array([value] * count_elem),
+                           np.array([value] * count_elem))
 
-    def time_check_concat(self, value, dim, container):
+    def time_check_concat(self, value, dim):
         _concat_date_cols(self.object)
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 586358750c3c5..900230a5ebf89 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2424,59 +2424,9 @@ cdef cnp.ndarray[object] _concat_date_cols_numpy(tuple date_cols,
     return result
 
 
-@cython.wraparound(False)
-@cython.boundscheck(False)
-cdef cnp.ndarray[object] _concat_date_cols_sequence(tuple date_cols,
-                                                    Py_ssize_t rows_count,
-                                                    Py_ssize_t col_count,
-                                                    bint keep_trivial_numbers):
-    """
-    Concatenates elements from sequences into strings.
-
-    Parameters
-    ----------
-    date_cols : tuple of sequences
-    rows_count : Py_ssize_t
-        count of elements from sequences that will be concatenated
-    col_count : Py_ssize_t
-        count of sequences whose elements will be concatenated
-    keep_trivial_numbers : bool, default False
-        if True and len(date_cols) == 1, then
-        conversion (to string from integer/float zero) is not performed
-
-    Returns
-    -------
-    arr_of_rows : ndarray (dtype=object)
-    """
-    cdef:
-        Py_ssize_t col_idx, row_idx
-        list list_to_join
-        cnp.ndarray[object] result
-        object[:] result_view
-
-    result = np.zeros(rows_count, dtype=object)
-
-    # create memoryview of result ndarray - more effecient indexing
-    result_view = result
-
-    if col_count == 1:
-        for row_idx, item in enumerate(date_cols[0]):
-            result_view[row_idx] = convert_to_unicode(item,
-                                                      keep_trivial_numbers)
-    else:
-        list_to_join = [None] * col_count
-        for row_idx in range(rows_count):
-            for col_idx, array in enumerate(date_cols):
-                list_to_join[col_idx] = convert_to_unicode(array[row_idx],
-                                                           False)
-            result_view[row_idx] = PyUnicode_Join(' ', list_to_join)
-
-    return result
-
-
 def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True):
     """
-    Concatenates elements from sequences in `date_cols` into strings.
+    Concatenates elements from numpy arrays in `date_cols` into strings.
 
     Parameters
     ----------
@@ -2506,9 +2456,6 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True):
     rows_count = min(len(array) for array in date_cols)
 
     if all(util.is_array(array) for array in date_cols):
-        # call specialized function to increase performance
         return _concat_date_cols_numpy(date_cols, rows_count, col_count,
                                        keep_trivial_numbers)
-    else:
-        return _concat_date_cols_sequence(date_cols, rows_count, col_count,
-                                          keep_trivial_numbers)
+    raise ValueError("not all elements from date_cols are numpy arrays")

From 3800c406f0292eb4580a17dfd4b5bbc0692e9d1f Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 6 May 2019 13:47:37 +0300
Subject: [PATCH 38/42] now only one function '_concat_date_cols'

---
 asv_bench/benchmarks/io/parsers.py |  2 +-
 pandas/_libs/lib.pyx               | 70 ++++++++----------------------
 2 files changed, 20 insertions(+), 52 deletions(-)

diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py
index 8a54b8c044ec6..6ee935e5ea51d 100644
--- a/asv_bench/benchmarks/io/parsers.py
+++ b/asv_bench/benchmarks/io/parsers.py
@@ -24,7 +24,7 @@ class ConcatDateCols(object):
     param_names = ['value', 'dim']
 
     def setup(self, value, dim):
-        count_elem = 100000
+        count_elem = 10000
         if dim == 1:
             self.object = (np.array([value] * count_elem),)
         if dim == 2:
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 900230a5ebf89..d61df351de9ea 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2352,21 +2352,14 @@ cdef inline object convert_to_unicode(object item,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-cdef cnp.ndarray[object] _concat_date_cols_numpy(tuple date_cols,
-                                                 Py_ssize_t rows_count,
-                                                 Py_ssize_t col_count,
-                                                 bint keep_trivial_numbers):
+def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True):
     """
-    Concatenates elements from numpy arrays into strings.
+    Concatenates elements from numpy arrays in `date_cols` into strings.
 
     Parameters
     ----------
     date_cols : tuple of numpy arrays
-    rows_count : Py_ssize_t
-        count of elements from arrays that will be concatenated
-    col_count : Py_ssize_t
-        count of arrays whose elements will be concatenated
-    keep_trivial_numbers : bool, default False
+    keep_trivial_numbers : bool, default True
         if True and len(date_cols) == 1, then
         conversion (to string from integer/float zero) is not performed
 
@@ -2374,12 +2367,16 @@ cdef cnp.ndarray[object] _concat_date_cols_numpy(tuple date_cols,
     -------
     arr_of_rows : ndarray (dtype=object)
 
-    Notes
-    -----
-    This function speeds up concatenation for numpy arrays.
-    You also can use `_concat_date_cols_sequence` function.
+    Examples
+    --------
+    >>> dates=np.array(['3/31/2019', '4/31/2019'], dtype=object)
+    >>> times=np.array(['11:20', '10:45'], dtype=object)
+    >>> result = _concat_date_cols((dates, times))
+    >>> result
+    array(['3/31/2019 11:20', '4/31/2019 10:45'], dtype=object)
     """
     cdef:
+        Py_ssize_t rows_count = 0, col_count = len(date_cols)
         Py_ssize_t col_idx, row_idx
         list list_to_join
         cnp.ndarray[object] iters
@@ -2388,6 +2385,14 @@ cdef cnp.ndarray[object] _concat_date_cols_numpy(tuple date_cols,
         cnp.ndarray[object] result
         object[:] result_view
 
+    if col_count == 0:
+        return np.zeros(0, dtype=object)
+
+
+    if not all(util.is_array(array) for array in date_cols):
+        raise ValueError("not all elements from date_cols are numpy arrays")
+
+    rows_count = min(len(array) for array in date_cols)
     result = np.zeros(rows_count, dtype=object)
     result_view = result
 
@@ -2422,40 +2427,3 @@ cdef cnp.ndarray[object] _concat_date_cols_numpy(tuple date_cols,
             result_view[row_idx] = PyUnicode_Join(' ', list_to_join)
 
     return result
-
-
-def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True):
-    """
-    Concatenates elements from numpy arrays in `date_cols` into strings.
-
-    Parameters
-    ----------
-    date_cols : tuple of sequences
-    keep_trivial_numbers : bool, default True
-        if True and len(date_cols) == 1, then
-        conversion (to string from integer/float zero) is not performed
-
-    Returns
-    -------
-    arr_of_rows : ndarray (dtype=object)
-
-    Examples
-    --------
-    >>> dates=np.array(['3/31/2019', '4/31/2019'], dtype=object)
-    >>> times=np.array(['11:20', '10:45'], dtype=object)
-    >>> result = _concat_date_cols((dates, times))
-    >>> result
-    array(['3/31/2019 11:20', '4/31/2019 10:45'], dtype=object)
-    """
-    cdef:
-        Py_ssize_t rows_count = 0, col_count = len(date_cols)
-
-    if col_count == 0:
-        return np.zeros(0, dtype=object)
-
-    rows_count = min(len(array) for array in date_cols)
-
-    if all(util.is_array(array) for array in date_cols):
-        return _concat_date_cols_numpy(date_cols, rows_count, col_count,
-                                       keep_trivial_numbers)
-    raise ValueError("not all elements from date_cols are numpy arrays")

From b45df3f564c24d0a60fa9eb759d77a237b700161 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 7 May 2019 12:19:21 +0300
Subject: [PATCH 39/42] removed 'do_convert' local var from
 'convert_to_unicode'

---
 pandas/_libs/lib.pyx | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index d61df351de9ea..526bf00f21c0c 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2329,22 +2329,21 @@ cdef inline object convert_to_unicode(object item,
 
     Returns
     -------
-    str
+    str or int or float
     """
     cdef:
-        bint do_convert = 1
         float64_t float_item
 
     if keep_trivial_numbers:
         if isinstance(item, int):
             if <int>item == 0:
-                do_convert = 0
+                return item
         elif isinstance(item, float):
             float_item = item
             if float_item == 0.0 or float_item != float_item:
-                do_convert = 0
+                return item
 
-    if do_convert and not isinstance(item, str):
+    if not isinstance(item, str):
         item = PyObject_Str(item)
 
     return item
@@ -2388,7 +2387,6 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True):
     if col_count == 0:
         return np.zeros(0, dtype=object)
 
-
     if not all(util.is_array(array) for array in date_cols):
         raise ValueError("not all elements from date_cols are numpy arrays")
 

From 43dffec444a0a5451fe402822cda70fa27d53ba9 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 7 May 2019 12:54:50 +0300
Subject: [PATCH 40/42] replaced '_concat_date_cols' and 'convert_to_unicode'
 from lib.pyx to parsing.pyx

---
 asv_bench/benchmarks/io/parsers.py         |   5 +-
 pandas/_libs/lib.pyx                       | 114 -------------------
 pandas/_libs/tslibs/parsing.pyx            | 124 ++++++++++++++++++++-
 pandas/io/parsers.py                       |  11 +-
 pandas/tests/io/parser/test_parse_dates.py |   2 +-
 5 files changed, 131 insertions(+), 125 deletions(-)

diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py
index 6ee935e5ea51d..493955d394443 100644
--- a/asv_bench/benchmarks/io/parsers.py
+++ b/asv_bench/benchmarks/io/parsers.py
@@ -1,8 +1,7 @@
 import numpy as np
 
-from pandas._libs.tslibs.parsing import _does_string_look_like_datetime
-
-from pandas.io.parsers import _concat_date_cols
+from pandas._libs.tslibs.parsing import (
+    _concat_date_cols, _does_string_look_like_datetime)
 
 
 class DoesStringLookLikeDatetime(object):
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 526bf00f21c0c..c09fb96eb9182 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2311,117 +2311,3 @@ def fast_multiget(dict mapping, ndarray keys, default=np.nan):
             output[i] = default
 
     return maybe_convert_objects(output)
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-cdef inline object convert_to_unicode(object item,
-                                      bint keep_trivial_numbers):
-    """
-    Convert `item` to str.
-
-    Parameters
-    ----------
-    item : object
-    keep_trivial_numbers : bool
-        if True, then conversion (to string from integer/float zero)
-        is not performed
-
-    Returns
-    -------
-    str or int or float
-    """
-    cdef:
-        float64_t float_item
-
-    if keep_trivial_numbers:
-        if isinstance(item, int):
-            if <int>item == 0:
-                return item
-        elif isinstance(item, float):
-            float_item = item
-            if float_item == 0.0 or float_item != float_item:
-                return item
-
-    if not isinstance(item, str):
-        item = PyObject_Str(item)
-
-    return item
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True):
-    """
-    Concatenates elements from numpy arrays in `date_cols` into strings.
-
-    Parameters
-    ----------
-    date_cols : tuple of numpy arrays
-    keep_trivial_numbers : bool, default True
-        if True and len(date_cols) == 1, then
-        conversion (to string from integer/float zero) is not performed
-
-    Returns
-    -------
-    arr_of_rows : ndarray (dtype=object)
-
-    Examples
-    --------
-    >>> dates=np.array(['3/31/2019', '4/31/2019'], dtype=object)
-    >>> times=np.array(['11:20', '10:45'], dtype=object)
-    >>> result = _concat_date_cols((dates, times))
-    >>> result
-    array(['3/31/2019 11:20', '4/31/2019 10:45'], dtype=object)
-    """
-    cdef:
-        Py_ssize_t rows_count = 0, col_count = len(date_cols)
-        Py_ssize_t col_idx, row_idx
-        list list_to_join
-        cnp.ndarray[object] iters
-        object[::1] iters_view
-        flatiter it
-        cnp.ndarray[object] result
-        object[:] result_view
-
-    if col_count == 0:
-        return np.zeros(0, dtype=object)
-
-    if not all(util.is_array(array) for array in date_cols):
-        raise ValueError("not all elements from date_cols are numpy arrays")
-
-    rows_count = min(len(array) for array in date_cols)
-    result = np.zeros(rows_count, dtype=object)
-    result_view = result
-
-    if col_count == 1:
-        array = date_cols[0]
-        it = <flatiter>PyArray_IterNew(array)
-        for row_idx in range(rows_count):
-            item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
-            result_view[row_idx] = convert_to_unicode(item,
-                                                      keep_trivial_numbers)
-            PyArray_ITER_NEXT(it)
-    else:
-        # create fixed size list - more effecient memory allocation
-        list_to_join = [None] * col_count
-        iters = np.zeros(col_count, dtype=object)
-
-        # create memoryview of iters ndarray, that will contain some
-        # flatiter's for each array in `date_cols` - more effecient indexing
-        iters_view = iters
-        for col_idx, array in enumerate(date_cols):
-            iters_view[col_idx] = PyArray_IterNew(array)
-
-        # array elements that are on the same line are converted to one string
-        for row_idx in range(rows_count):
-            for col_idx, array in enumerate(date_cols):
-                # this cast is needed, because we did not find a way
-                # to efficiently store `flatiter` type objects in ndarray
-                it = <flatiter>iters_view[col_idx]
-                item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
-                list_to_join[col_idx] = convert_to_unicode(item, False)
-                PyArray_ITER_NEXT(it)
-            result_view[row_idx] = PyUnicode_Join(' ', list_to_join)
-
-    return result
diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index ce1123670022b..5f8a817db053e 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -7,11 +7,19 @@ from io import StringIO
 
 from libc.string cimport strchr
 
+import cython
+
+from cpython cimport PyObject_Str, PyUnicode_Join
+
 from cpython.datetime cimport datetime, datetime_new, import_datetime
 from cpython.version cimport PY_VERSION_HEX
 import_datetime()
 
 import numpy as np
+cimport numpy as cnp
+from numpy cimport (PyArray_GETITEM, PyArray_ITER_DATA, PyArray_ITER_NEXT,
+                    PyArray_IterNew, flatiter, float64_t)
+cnp.import_array()
 
 # dateutil compat
 from dateutil.tz import (tzoffset,
@@ -26,7 +34,7 @@ from pandas._config import get_option
 
 from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS
 from pandas._libs.tslibs.nattype import nat_strings, NaT
-from pandas._libs.tslibs.util cimport get_c_string_buf_and_size
+from pandas._libs.tslibs.util cimport is_array, get_c_string_buf_and_size
 
 cdef extern from "../src/headers/portable.h":
     int getdigit_ascii(char c, int default) nogil
@@ -880,3 +888,117 @@ def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=du_parse,
         return guessed_format
     else:
         return None
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline object convert_to_unicode(object item,
+                                      bint keep_trivial_numbers):
+    """
+    Convert `item` to str.
+
+    Parameters
+    ----------
+    item : object
+    keep_trivial_numbers : bool
+        if True, then conversion (to string from integer/float zero)
+        is not performed
+
+    Returns
+    -------
+    str or int or float
+    """
+    cdef:
+        float64_t float_item
+
+    if keep_trivial_numbers:
+        if isinstance(item, int):
+            if <int>item == 0:
+                return item
+        elif isinstance(item, float):
+            float_item = item
+            if float_item == 0.0 or float_item != float_item:
+                return item
+
+    if not isinstance(item, str):
+        item = PyObject_Str(item)
+
+    return item
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True):
+    """
+    Concatenates elements from numpy arrays in `date_cols` into strings.
+
+    Parameters
+    ----------
+    date_cols : tuple of numpy arrays
+    keep_trivial_numbers : bool, default True
+        if True and len(date_cols) == 1, then
+        conversion (to string from integer/float zero) is not performed
+
+    Returns
+    -------
+    arr_of_rows : ndarray (dtype=object)
+
+    Examples
+    --------
+    >>> dates=np.array(['3/31/2019', '4/31/2019'], dtype=object)
+    >>> times=np.array(['11:20', '10:45'], dtype=object)
+    >>> result = _concat_date_cols((dates, times))
+    >>> result
+    array(['3/31/2019 11:20', '4/31/2019 10:45'], dtype=object)
+    """
+    cdef:
+        Py_ssize_t rows_count = 0, col_count = len(date_cols)
+        Py_ssize_t col_idx, row_idx
+        list list_to_join
+        cnp.ndarray[object] iters
+        object[::1] iters_view
+        flatiter it
+        cnp.ndarray[object] result
+        object[:] result_view
+
+    if col_count == 0:
+        return np.zeros(0, dtype=object)
+
+    if not all(is_array(array) for array in date_cols):
+        raise ValueError("not all elements from date_cols are numpy arrays")
+
+    rows_count = min(len(array) for array in date_cols)
+    result = np.zeros(rows_count, dtype=object)
+    result_view = result
+
+    if col_count == 1:
+        array = date_cols[0]
+        it = <flatiter>PyArray_IterNew(array)
+        for row_idx in range(rows_count):
+            item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
+            result_view[row_idx] = convert_to_unicode(item,
+                                                      keep_trivial_numbers)
+            PyArray_ITER_NEXT(it)
+    else:
+        # create fixed size list - more effecient memory allocation
+        list_to_join = [None] * col_count
+        iters = np.zeros(col_count, dtype=object)
+
+        # create memoryview of iters ndarray, that will contain some
+        # flatiter's for each array in `date_cols` - more effecient indexing
+        iters_view = iters
+        for col_idx, array in enumerate(date_cols):
+            iters_view[col_idx] = PyArray_IterNew(array)
+
+        # array elements that are on the same line are converted to one string
+        for row_idx in range(rows_count):
+            for col_idx, array in enumerate(date_cols):
+                # this cast is needed, because we did not find a way
+                # to efficiently store `flatiter` type objects in ndarray
+                it = <flatiter>iters_view[col_idx]
+                item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
+                list_to_join[col_idx] = convert_to_unicode(item, False)
+                PyArray_ITER_NEXT(it)
+            result_view[row_idx] = PyUnicode_Join(' ', list_to_join)
+
+    return result
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 939bb6ad287e2..f25142fcfcf58 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -14,7 +14,6 @@
 import numpy as np
 
 import pandas._libs.lib as lib
-from pandas._libs.lib import _concat_date_cols
 import pandas._libs.ops as libops
 import pandas._libs.parsers as parsers
 from pandas._libs.tslibs import parsing
@@ -3187,7 +3186,7 @@ def _make_date_converter(date_parser=None, dayfirst=False,
                          infer_datetime_format=False, cache_dates=True):
     def converter(*date_cols):
         if date_parser is None:
-            strs = _concat_date_cols(date_cols)
+            strs = parsing._concat_date_cols(date_cols)
 
             try:
                 return tools.to_datetime(
@@ -3217,10 +3216,10 @@ def converter(*date_cols):
             except Exception:
                 try:
                     return tools.to_datetime(
-                        parsing.try_parse_dates(_concat_date_cols(date_cols),
-                                                parser=date_parser,
-                                                dayfirst=dayfirst),
-                        cache=cache_dates,
+                        parsing.try_parse_dates(
+                            parsing._concat_date_cols(date_cols),
+                            parser=date_parser,
+                            dayfirst=dayfirst),
                         errors='ignore')
                 except Exception:
                     return generic_parser(date_parser, *date_cols)
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index 6c4dfe2ffa1fa..709ca3e686229 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -76,7 +76,7 @@ def date_parser(*date_cols):
         -------
         parsed : Series
         """
-        return parsing.try_parse_dates(parsers._concat_date_cols(date_cols))
+        return parsing.try_parse_dates(parsing._concat_date_cols(date_cols))
 
     result = parser.read_csv(StringIO(data), header=None,
                              date_parser=date_parser, prefix="X",

From c06a662fe681c9d7c68d1a9289e93e466fc94311 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 7 May 2019 14:43:31 +0300
Subject: [PATCH 41/42] added 'test_concat_date_col_fail' test

---
 pandas/_libs/tslibs/parsing.pyx            |  2 --
 pandas/tests/io/parser/test_parse_dates.py | 15 +++++++++++++--
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index 5f8a817db053e..18d55999aaa38 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -58,8 +58,6 @@ _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0,
 cdef:
     set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}
 
-    set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}
-
 # ----------------------------------------------------------------------
 cdef:
     const char* delimiters = " /-."
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index 709ca3e686229..46353b5345018 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -19,12 +19,11 @@
 from pandas.compat.numpy import np_array_datetime64_compat
 
 import pandas as pd
-from pandas import DataFrame, DatetimeIndex, Index, MultiIndex
+from pandas import DataFrame, DatetimeIndex, Index, MultiIndex, Series
 from pandas.core.indexes.datetimes import date_range
 import pandas.util.testing as tm
 
 import pandas.io.date_converters as conv
-import pandas.io.parsers as parsers
 
 # constant
 _DEFAULT_DATETIME = datetime(1, 1, 1)
@@ -117,6 +116,18 @@ def date_parser(*date_cols):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("container", [list, tuple, Index, Series])
+@pytest.mark.parametrize("dim", [1, 2])
+def test_concat_date_col_fail(container, dim):
+    msg = "not all elements from date_cols are numpy arrays"
+    value = "19990127"
+
+    date_cols = tuple(container([value]) for _ in range(dim))
+
+    with pytest.raises(ValueError, match=msg):
+        parsing._concat_date_cols(date_cols)
+
+
 @pytest.mark.parametrize("keep_date_col", [True, False])
 def test_multiple_date_col(all_parsers, keep_date_col):
     data = """\

From 5dda33c5469bb9024db468638fb96101beafb5da Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 10 May 2019 00:11:33 +0300
Subject: [PATCH 42/42] added doc-string to '_does_string_look_like_datetime'
 func; changed 'date_string' -> 'py_string'

---
 pandas/_libs/tslibs/parsing.pyx | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index 18d55999aaa38..068ad016459a8 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -315,7 +315,19 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False,
     return parsed, parsed, reso
 
 
-cpdef bint _does_string_look_like_datetime(object date_string):
+cpdef bint _does_string_look_like_datetime(object py_string):
+    """
+    Checks whether given string is a datetime: it has to start with '0' or
+    be greater than 1000.
+
+    Parameters
+    ----------
+    py_string: object
+
+    Returns
+    -------
+    whether given string is a datetime
+    """
     cdef:
         const char *buf
         char *endptr = NULL
@@ -324,14 +336,14 @@ cpdef bint _does_string_look_like_datetime(object date_string):
         char first
         int error = 0
 
-    buf = get_c_string_buf_and_size(date_string, &length)
+    buf = get_c_string_buf_and_size(py_string, &length)
     if length >= 1:
         first = buf[0]
         if first == b'0':
             # Strings starting with 0 are more consistent with a
             # date-like string than a number
             return True
-        elif date_string in _not_datelike_strings:
+        elif py_string in _not_datelike_strings:
             return False
         else:
             # xstrtod with such paramaters copies behavior of python `float`