From 7761ecdbc0530ca45c3ae9e37ca40f4ed44bf537 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 24 Jan 2023 08:54:09 -0800 Subject: [PATCH 01/26] BUG: hashing datetime64 objects --- pandas/_libs/src/klib/khash_python.h | 41 ++++++++++++++++++++ pandas/tests/indexes/object/test_indexing.py | 26 +++++++++++++ setup.py | 2 + 3 files changed, 69 insertions(+) diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h index 56afea049c1ec..bb8984ee8f631 100644 --- a/pandas/_libs/src/klib/khash_python.h +++ b/pandas/_libs/src/klib/khash_python.h @@ -7,7 +7,10 @@ typedef npy_complex64 khcomplex64_t; typedef npy_complex128 khcomplex128_t; +// get pandas_datetime_to_datetimestruct +#include <../../tslibs/src/datetime/np_datetime.h> +#include "datetime.h" // khash should report usage to tracemalloc #if PY_VERSION_HEX >= 0x03060000 @@ -330,6 +333,40 @@ Py_hash_t PANDAS_INLINE tupleobject_hash(PyTupleObject* key) { } +// TODO: same thing for timedelta64 objects +Py_hash_t PANDAS_INLINE np_datetime64_object_hash(PyObject* key) { + // GH#50690 numpy's hash implementation does not preserve comparabity + // either across resolutions or with standard library objects. + NPY_DATETIMEUNIT unit = (NPY_DATETIMEUNIT)((PyDatetimeScalarObject*)key)->obmeta.base; + npy_datetime value = ((PyDatetimeScalarObject*)key)->obval; + npy_datetimestruct dts; + PyObject* dt; + + pandas_datetime_to_datetimestruct(value, unit, &dts); + + if ((dts.year > 0) && (dts.year <= 9999) && (dts.ps == 0) && (dts.as == 0)) { + // we CAN cast to pydatetime, so use that hash to ensure we compare + // as matching standard library datetimes (and pd.Timestamps) + PyDateTime_IMPORT; + dt = PyDateTime_FromDateAndTime( + dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us + ); + return PyObject_Hash(dt); + } + + if (unit == NPY_FR_as) { + // nothing higher to cast to, so use value. Lower-resolution + // cases are responsible for matching this. + return value; + } + + // TODO: see if we can cast to the next-highest unit without overflow. + // If so, return the hash of _that_ reso. Otherwise, return value. + // See also Timestamp.__hash__ + return value; +} + + khuint32_t PANDAS_INLINE kh_python_hash_func(PyObject* key) { Py_hash_t hash; // For PyObject_Hash holds: @@ -351,6 +388,10 @@ khuint32_t PANDAS_INLINE kh_python_hash_func(PyObject* key) { else if (PyTuple_CheckExact(key)) { hash = tupleobject_hash((PyTupleObject*)key); } + else if (PyObject_TypeCheck(key, &PyDatetimeArrType_Type)) { + // GH#50690 + hash = np_datetime64_object_hash(key); + } else { hash = PyObject_Hash(key); } diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py index 87d3afc77d556..d48a59d418c30 100644 --- a/pandas/tests/indexes/object/test_indexing.py +++ b/pandas/tests/indexes/object/test_indexing.py @@ -187,3 +187,29 @@ def test_slice_locs_dup(self): assert index2.slice_locs(end="a") == (0, 6) assert index2.slice_locs("d", "b") == (0, 4) assert index2.slice_locs("c", "a") == (2, 6) + + +def test_np_datetime64_objects(): + # GH#50690 + ms = np.datetime64(1, "ms") + us = np.datetime64(1000, "us") + + left = Index([ms], dtype=object) + right = Index([us], dtype=object) + + assert left[0] in right + assert right[0] in left + + assert left.get_loc(right[0]) == 0 + assert right.get_loc(left[0]) == 0 + + # non-monotonic cases go through different paths in cython code + sec = np.datetime64("9999-01-01", "s") + day = np.datetime64("2016-01-01", "D") + left2 = Index([ms, sec, day], dtype=object) + + expected = np.array([0], dtype=np.intp) + res = left2[:1].get_indexer(right) + tm.assert_numpy_array_equal(res, expected) + res = left2.get_indexer(right) + tm.assert_numpy_array_equal(res, expected) diff --git a/setup.py b/setup.py index f8fa048757289..3f661c56ec382 100755 --- a/setup.py +++ b/setup.py @@ -453,7 +453,9 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "depends": ( ["pandas/_libs/src/klib/khash_python.h", "pandas/_libs/src/klib/khash.h"] + _pxi_dep["hashtable"] + + tseries_depends ), + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.index": { "pyxfile": "_libs/index", From 610b0c6dd4144b5a00645807d4de84c03bbe7b02 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 24 Jan 2023 12:27:32 -0800 Subject: [PATCH 02/26] handle cases out of pydatetime bounds --- pandas/_libs/src/klib/khash_python.h | 111 ++++++++++++++++++++++++--- setup.py | 3 +- 2 files changed, 102 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h index bb8984ee8f631..80850769c459e 100644 --- a/pandas/_libs/src/klib/khash_python.h +++ b/pandas/_libs/src/klib/khash_python.h @@ -333,15 +333,113 @@ Py_hash_t PANDAS_INLINE tupleobject_hash(PyTupleObject* key) { } +// TODO: if nanos is most common/important, might be most performant to +// make that canonical and others cast to that? +Py_hash_t PANDAS_INLINE hash_datetime_value_and_reso(npy_datetime value, NPY_DATETIMEUNIT unit, npy_datetimestruct* dts) { + // If we cannot cast to pydatetime, then the question is if there are + // other-resolution datetime64 objects that we might be equal to whose + // hashes we need to match. We let year-reso objects return value, and make + // higher-resolution cases responsible for checking of they match. + if (unit == NPY_FR_Y) { + return value; + } + else if (unit == NPY_FR_M) { + if ((value % 12) == 0) { + return hash_datetime_value_and_reso(value / 12, NPY_FR_Y, dts); + } + return value; + } + else if (unit == NPY_FR_W) { + if (dts->day == 1) { + value = (dts->year - 1970) * 12 + dts->month; + return hash_datetime_value_and_reso(value, NPY_FR_M, dts); + } + return value; + } + else if (unit == NPY_FR_D) { + if ((value % 7) == 0) { + return hash_datetime_value_and_reso(value / 7, NPY_FR_W, dts); + } + return value; + } + else if (unit == NPY_FR_h) { + if ((value % 24) == 0) { + return hash_datetime_value_and_reso(value / 24, NPY_FR_D, dts); + } + return value; + } + else if (unit == NPY_FR_m) { + if ((value % 60) == 0) { + return hash_datetime_value_and_reso(value / 60, NPY_FR_h, dts); + } + return value; + } + else if (unit == NPY_FR_s) { + if ((value % 60) == 0) { + return hash_datetime_value_and_reso(value / 60, NPY_FR_m, dts); + } + return value; + } + else if (unit == NPY_FR_ms) { + if ((value % 1000) == 0) { + return hash_datetime_value_and_reso(value / 1000, NPY_FR_s, dts); + } + return value; + } + else if (unit == NPY_FR_us) { + if ((value % 1000) == 0) { + return hash_datetime_value_and_reso(value / 1000, NPY_FR_ns, dts); + } + return value; + } + else if (unit == NPY_FR_ns) { + if ((value % 1000) == 0) { + return hash_datetime_value_and_reso(value / 1000, NPY_FR_us, dts); + } + return value; + } + else if (unit == NPY_FR_ps) { + if ((value % 1000) == 0) { + return hash_datetime_value_and_reso(value / 1000, NPY_FR_ns, dts); + } + return value; + } + else if (unit == NPY_FR_fs) { + if ((value % 1000) == 0) { + return hash_datetime_value_and_reso(value / 1000, NPY_FR_ps, dts); + } + return value; + } + else if (unit == NPY_FR_as) { + if ((value % 1000) == 0) { + return hash_datetime_value_and_reso(value / 1000, NPY_FR_fs, dts); + } + return value; + } + else { + // i.e. NPY_FR_GENERIC + // we default to treating these like nanos + return hash_datetime_value_and_reso(value, NPY_FR_ns, dts); + } +} + + // TODO: same thing for timedelta64 objects -Py_hash_t PANDAS_INLINE np_datetime64_object_hash(PyObject* key) { +Py_hash_t np_datetime64_object_hash(PyObject* key) { // GH#50690 numpy's hash implementation does not preserve comparabity // either across resolutions or with standard library objects. + // See also Timestamp.__hash__ + NPY_DATETIMEUNIT unit = (NPY_DATETIMEUNIT)((PyDatetimeScalarObject*)key)->obmeta.base; npy_datetime value = ((PyDatetimeScalarObject*)key)->obval; npy_datetimestruct dts; PyObject* dt; + if (value == NPY_DATETIME_NAT) { + // np.datetime64("NaT") in any reso + return NPY_DATETIME_NAT; + } + pandas_datetime_to_datetimestruct(value, unit, &dts); if ((dts.year > 0) && (dts.year <= 9999) && (dts.ps == 0) && (dts.as == 0)) { @@ -354,16 +452,7 @@ Py_hash_t PANDAS_INLINE np_datetime64_object_hash(PyObject* key) { return PyObject_Hash(dt); } - if (unit == NPY_FR_as) { - // nothing higher to cast to, so use value. Lower-resolution - // cases are responsible for matching this. - return value; - } - - // TODO: see if we can cast to the next-highest unit without overflow. - // If so, return the hash of _that_ reso. Otherwise, return value. - // See also Timestamp.__hash__ - return value; + return hash_datetime_value_and_reso(value, unit, &dts); } diff --git a/setup.py b/setup.py index 3f661c56ec382..adfe03f1b22cc 100755 --- a/setup.py +++ b/setup.py @@ -467,7 +467,8 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.interval": { "pyxfile": "_libs/interval", "include": klib_include, - "depends": _pxi_dep["interval"], + "depends": _pxi_dep["interval"] + tseries_depends, + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.join": {"pyxfile": "_libs/join", "include": klib_include}, "_libs.lib": { From 92a39ebd3d0affd1ca81ca57c199f56f0a2d646b Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 25 Jan 2023 07:47:02 -0800 Subject: [PATCH 03/26] troubleshoot CI builds --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index adfe03f1b22cc..40b7228cde17e 100755 --- a/setup.py +++ b/setup.py @@ -442,7 +442,8 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.algos": { "pyxfile": "_libs/algos", "include": klib_include, - "depends": _pxi_dep["algos"], + "depends": _pxi_dep["algos"] + tseries_depends, + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.arrays": {"pyxfile": "_libs/arrays"}, "_libs.groupby": {"pyxfile": "_libs/groupby"}, From 2f67805329fd6dcc0c2c36964c1e931dbf4478bf Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 25 Jan 2023 08:40:13 -0800 Subject: [PATCH 04/26] troubleshoot CI builds --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 40b7228cde17e..8a28bb710d908 100755 --- a/setup.py +++ b/setup.py @@ -461,7 +461,8 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.index": { "pyxfile": "_libs/index", "include": klib_include, - "depends": _pxi_dep["index"], + "depends": _pxi_dep["index"] + tseries_depends, + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.indexing": {"pyxfile": "_libs/indexing"}, "_libs.internals": {"pyxfile": "_libs/internals"}, From 0635f86f7a4215d6f70cbe11af6f7e93cc508931 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 25 Jan 2023 09:48:49 -0800 Subject: [PATCH 05/26] troubleshoot CI builds --- setup.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8a28bb710d908..5a2b2541ab03b 100755 --- a/setup.py +++ b/setup.py @@ -472,7 +472,11 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "depends": _pxi_dep["interval"] + tseries_depends, "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, - "_libs.join": {"pyxfile": "_libs/join", "include": klib_include}, + "_libs.join": { + "pyxfile": "_libs/join", + "depends": tseries_depends, + "include": klib_include, + }, "_libs.lib": { "pyxfile": "_libs/lib", "depends": lib_depends + tseries_depends, From 229ab72d28f416af269d5731412eb3c6a2916469 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 25 Jan 2023 10:46:11 -0800 Subject: [PATCH 06/26] troubleshoot CI builds --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 5a2b2541ab03b..4e767493383dc 100755 --- a/setup.py +++ b/setup.py @@ -476,6 +476,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pyxfile": "_libs/join", "depends": tseries_depends, "include": klib_include, + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.lib": { "pyxfile": "_libs/lib", From 6e96805978dbcba769eb074c25eff6a99a0f101a Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 25 Jan 2023 12:57:04 -0800 Subject: [PATCH 07/26] troubleshoot CI builds --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4e767493383dc..cdb747a9c1480 100755 --- a/setup.py +++ b/setup.py @@ -491,10 +491,12 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "depends": [ "pandas/_libs/src/parser/tokenizer.h", "pandas/_libs/src/parser/io.h", - ], + ] + + tseries_depends, "sources": [ "pandas/_libs/src/parser/tokenizer.c", "pandas/_libs/src/parser/io.c", + "pandas/_libs/tslibs/src/datetime/np_datetime.c", ], }, "_libs.reduction": {"pyxfile": "_libs/reduction"}, From 058b666b84c8b8e0162b557fd5e993e0db9f967f Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 27 Jan 2023 09:02:25 -0800 Subject: [PATCH 08/26] suggested edits --- pandas/_libs/src/klib/khash_python.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h index 80850769c459e..bd3011b9b1c96 100644 --- a/pandas/_libs/src/klib/khash_python.h +++ b/pandas/_libs/src/klib/khash_python.h @@ -341,6 +341,10 @@ Py_hash_t PANDAS_INLINE hash_datetime_value_and_reso(npy_datetime value, NPY_DAT // hashes we need to match. We let year-reso objects return value, and make // higher-resolution cases responsible for checking of they match. if (unit == NPY_FR_Y) { + if (value == -1) { + // https://github.com/pandas-dev/pandas/pull/50960#discussion_r1088695136 + return -2; + } return value; } else if (unit == NPY_FR_M) { @@ -433,7 +437,6 @@ Py_hash_t np_datetime64_object_hash(PyObject* key) { NPY_DATETIMEUNIT unit = (NPY_DATETIMEUNIT)((PyDatetimeScalarObject*)key)->obmeta.base; npy_datetime value = ((PyDatetimeScalarObject*)key)->obval; npy_datetimestruct dts; - PyObject* dt; if (value == NPY_DATETIME_NAT) { // np.datetime64("NaT") in any reso @@ -446,10 +449,19 @@ Py_hash_t np_datetime64_object_hash(PyObject* key) { // we CAN cast to pydatetime, so use that hash to ensure we compare // as matching standard library datetimes (and pd.Timestamps) PyDateTime_IMPORT; + + PyObject* dt; + Py_hash_t hash; + dt = PyDateTime_FromDateAndTime( dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us ); - return PyObject_Hash(dt); + if (dt == NULL) { + return -1; + } + hash = PyObject_Hash(dt); + Py_DECREF(dt); + return hash; } return hash_datetime_value_and_reso(value, unit, &dts); From 6e4836ed7c5c4cf0a22333000114731e3f9138f4 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 8 Feb 2023 13:44:36 -0800 Subject: [PATCH 09/26] use sebergs suggestion --- pandas/_libs/src/klib/khash_python.h | 102 +----------------- pandas/_libs/tslibs/np_datetime.pxd | 2 + .../_libs/tslibs/src/datetime/np_datetime.c | 52 +++++++++ .../_libs/tslibs/src/datetime/np_datetime.h | 3 + pandas/_libs/tslibs/timestamps.pyx | 11 +- 5 files changed, 67 insertions(+), 103 deletions(-) diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h index bd3011b9b1c96..413b38aa0f0d0 100644 --- a/pandas/_libs/src/klib/khash_python.h +++ b/pandas/_libs/src/klib/khash_python.h @@ -308,6 +308,7 @@ khuint32_t PANDAS_INLINE kh_python_hash_func(PyObject* key); #define _PandasHASH_XXROTATE(x) ((x << 13) | (x >> 19)) /* Rotate left 13 bits */ #endif + Py_hash_t PANDAS_INLINE tupleobject_hash(PyTupleObject* key) { Py_ssize_t i, len = Py_SIZE(key); PyObject **item = key->ob_item; @@ -318,9 +319,7 @@ Py_hash_t PANDAS_INLINE tupleobject_hash(PyTupleObject* key) { if (lane == (Py_uhash_t)-1) { return -1; } - acc += lane * _PandasHASH_XXPRIME_2; - acc = _PandasHASH_XXROTATE(acc); - acc *= _PandasHASH_XXPRIME_1; + acc = tuple_update_uhash(acc, lane); } /* Add input length, mangled to keep the historical value of hash(()). */ @@ -333,101 +332,6 @@ Py_hash_t PANDAS_INLINE tupleobject_hash(PyTupleObject* key) { } -// TODO: if nanos is most common/important, might be most performant to -// make that canonical and others cast to that? -Py_hash_t PANDAS_INLINE hash_datetime_value_and_reso(npy_datetime value, NPY_DATETIMEUNIT unit, npy_datetimestruct* dts) { - // If we cannot cast to pydatetime, then the question is if there are - // other-resolution datetime64 objects that we might be equal to whose - // hashes we need to match. We let year-reso objects return value, and make - // higher-resolution cases responsible for checking of they match. - if (unit == NPY_FR_Y) { - if (value == -1) { - // https://github.com/pandas-dev/pandas/pull/50960#discussion_r1088695136 - return -2; - } - return value; - } - else if (unit == NPY_FR_M) { - if ((value % 12) == 0) { - return hash_datetime_value_and_reso(value / 12, NPY_FR_Y, dts); - } - return value; - } - else if (unit == NPY_FR_W) { - if (dts->day == 1) { - value = (dts->year - 1970) * 12 + dts->month; - return hash_datetime_value_and_reso(value, NPY_FR_M, dts); - } - return value; - } - else if (unit == NPY_FR_D) { - if ((value % 7) == 0) { - return hash_datetime_value_and_reso(value / 7, NPY_FR_W, dts); - } - return value; - } - else if (unit == NPY_FR_h) { - if ((value % 24) == 0) { - return hash_datetime_value_and_reso(value / 24, NPY_FR_D, dts); - } - return value; - } - else if (unit == NPY_FR_m) { - if ((value % 60) == 0) { - return hash_datetime_value_and_reso(value / 60, NPY_FR_h, dts); - } - return value; - } - else if (unit == NPY_FR_s) { - if ((value % 60) == 0) { - return hash_datetime_value_and_reso(value / 60, NPY_FR_m, dts); - } - return value; - } - else if (unit == NPY_FR_ms) { - if ((value % 1000) == 0) { - return hash_datetime_value_and_reso(value / 1000, NPY_FR_s, dts); - } - return value; - } - else if (unit == NPY_FR_us) { - if ((value % 1000) == 0) { - return hash_datetime_value_and_reso(value / 1000, NPY_FR_ns, dts); - } - return value; - } - else if (unit == NPY_FR_ns) { - if ((value % 1000) == 0) { - return hash_datetime_value_and_reso(value / 1000, NPY_FR_us, dts); - } - return value; - } - else if (unit == NPY_FR_ps) { - if ((value % 1000) == 0) { - return hash_datetime_value_and_reso(value / 1000, NPY_FR_ns, dts); - } - return value; - } - else if (unit == NPY_FR_fs) { - if ((value % 1000) == 0) { - return hash_datetime_value_and_reso(value / 1000, NPY_FR_ps, dts); - } - return value; - } - else if (unit == NPY_FR_as) { - if ((value % 1000) == 0) { - return hash_datetime_value_and_reso(value / 1000, NPY_FR_fs, dts); - } - return value; - } - else { - // i.e. NPY_FR_GENERIC - // we default to treating these like nanos - return hash_datetime_value_and_reso(value, NPY_FR_ns, dts); - } -} - - // TODO: same thing for timedelta64 objects Py_hash_t np_datetime64_object_hash(PyObject* key) { // GH#50690 numpy's hash implementation does not preserve comparabity @@ -464,7 +368,7 @@ Py_hash_t np_datetime64_object_hash(PyObject* key) { return hash; } - return hash_datetime_value_and_reso(value, unit, &dts); + return hash_datetime_from_struct(&dts); } diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index fa560cd0853f6..8817ac1904a7e 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -71,6 +71,8 @@ cdef extern from "src/datetime/np_datetime.h": pandas_timedeltastruct *result ) nogil + int64_t hash_datetime_from_struct(npy_datetimestruct* dts) + cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=?) diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c index 2bac6c720c3b6..87a92edd8a26d 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -1091,3 +1091,55 @@ PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(PyArray_Descr *dtype) { return (((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); } + + +// we could use any hashing algorithm, this is the original CPython's for tuples + +#if SIZEOF_PY_UHASH_T > 4 +#define _PandasHASH_XXPRIME_1 ((Py_uhash_t)11400714785074694791ULL) +#define _PandasHASH_XXPRIME_2 ((Py_uhash_t)14029467366897019727ULL) +#define _PandasHASH_XXPRIME_5 ((Py_uhash_t)2870177450012600261ULL) +#define _PandasHASH_XXROTATE(x) ((x << 31) | (x >> 33)) /* Rotate left 31 bits */ +#else +#define _PandasHASH_XXPRIME_1 ((Py_uhash_t)2654435761UL) +#define _PandasHASH_XXPRIME_2 ((Py_uhash_t)2246822519UL) +#define _PandasHASH_XXPRIME_5 ((Py_uhash_t)374761393UL) +#define _PandasHASH_XXROTATE(x) ((x << 13) | (x >> 19)) /* Rotate left 13 bits */ +#endif + + +Py_uhash_t tuple_update_uhash(Py_uhash_t acc, Py_uhash_t lane) { + acc += lane * _PandasHASH_XXPRIME_2; + acc = _PandasHASH_XXROTATE(acc); + acc *= _PandasHASH_XXPRIME_1; + return acc; +} + +// https://github.com/pandas-dev/pandas/pull/50960 +Py_hash_t hash_datetime_from_struct(npy_datetimestruct* dts) { + /* + * If we cannot cast to datetime, use the datetime struct values directly + * and mix them similar to a tuple. + */ + + Py_uhash_t acc = _PandasHASH_XXPRIME_5; +#if 64 <= SIZEOF_PY_UHASH_T + acc = tuple_update_uhash(acc, (Py_uhash_t)dts->year); +#else + /* Mix lower and uper bits of the year if int64 is larger */ + acc = tuple_update_uhash(acc, (Py_uhash_t)dts->year); + acc = tuple_update_uhash(acc, (Py_uhash_t)(dts->year >> SIZEOF_PY_UHASH_T)); +#endif + acc = tuple_update_uhash(acc, (Py_uhash_t)dts->month); + acc = tuple_update_uhash(acc, (Py_uhash_t)dts->day); + acc = tuple_update_uhash(acc, (Py_uhash_t)dts->min); + acc = tuple_update_uhash(acc, (Py_uhash_t)dts->sec); + acc = tuple_update_uhash(acc, (Py_uhash_t)dts->us); + acc = tuple_update_uhash(acc, (Py_uhash_t)dts->ps); + acc = tuple_update_uhash(acc, (Py_uhash_t)dts->as); + /* should be a need to mix length, as it is fixed anyway? */ + if (acc == (Py_uhash_t)-1) { + acc = (Py_uhash_t)-2; + } + return acc; +} diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.h b/pandas/_libs/tslibs/src/datetime/np_datetime.h index 6ab915e517cfb..f606d6a4076d9 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.h @@ -99,4 +99,7 @@ PyArray_DatetimeMetaData get_datetime_metadata_from_dtype( PyArray_Descr *dtype); +Py_hash_t hash_datetime_from_struct(npy_datetimestruct* dts); +Py_uhash_t tuple_update_uhash(Py_uhash_t acc, Py_uhash_t lane); + #endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index fb3adda155254..66b767c11817e 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -8,6 +8,7 @@ shadows the python class, where we do any heavy lifting. """ import warnings + cimport cython import numpy as np @@ -86,6 +87,7 @@ from pandas._libs.tslibs.np_datetime cimport ( get_datetime64_unit, get_datetime64_value, get_unit_from_dtype, + hash_datetime_from_struct, npy_datetimestruct, npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, @@ -295,11 +297,12 @@ cdef class _Timestamp(ABCTimestamp): # ----------------------------------------------------------------- def __hash__(_Timestamp self): - if self.nanosecond: - return hash(self.value) - if not (1 <= self.year <= 9999): + cdef: + npy_datetimestruct dts + if not (1 <= self.year <= 9999) or self.nanosecond: # out of bounds for pydatetime - return hash(self.value) + pydatetime_to_dtstruct(self, &dts) + return hash_datetime_from_struct(&dts) if self.fold: return datetime.__hash__(self.replace(fold=0)) return datetime.__hash__(self) From 818682c6002d7b2525a90bf5ed0580bf2a0a4016 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 10 Feb 2023 10:44:32 -0800 Subject: [PATCH 10/26] suggested edits --- pandas/_libs/src/klib/khash_python.h | 4 ++-- pandas/_libs/tslibs/np_datetime.pxd | 2 +- pandas/_libs/tslibs/src/datetime/np_datetime.c | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h index 413b38aa0f0d0..ba80b656ae4dc 100644 --- a/pandas/_libs/src/klib/khash_python.h +++ b/pandas/_libs/src/klib/khash_python.h @@ -333,7 +333,7 @@ Py_hash_t PANDAS_INLINE tupleobject_hash(PyTupleObject* key) { // TODO: same thing for timedelta64 objects -Py_hash_t np_datetime64_object_hash(PyObject* key) { +Py_hash_t np_datetime64_object_hash(PyDatetimeScalarObject* key) { // GH#50690 numpy's hash implementation does not preserve comparabity // either across resolutions or with standard library objects. // See also Timestamp.__hash__ @@ -395,7 +395,7 @@ khuint32_t PANDAS_INLINE kh_python_hash_func(PyObject* key) { } else if (PyObject_TypeCheck(key, &PyDatetimeArrType_Type)) { // GH#50690 - hash = np_datetime64_object_hash(key); + hash = np_datetime64_object_hash((PyDatetimeScalarObject *)key); } else { hash = PyObject_Hash(key); diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 8817ac1904a7e..1f8766c5ef748 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -71,7 +71,7 @@ cdef extern from "src/datetime/np_datetime.h": pandas_timedeltastruct *result ) nogil - int64_t hash_datetime_from_struct(npy_datetimestruct* dts) + Py_hash_t hash_datetime_from_struct(npy_datetimestruct* dts) except? -1 cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c index 87a92edd8a26d..e35d335bb0ce6 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -1116,7 +1116,8 @@ Py_uhash_t tuple_update_uhash(Py_uhash_t acc, Py_uhash_t lane) { } // https://github.com/pandas-dev/pandas/pull/50960 -Py_hash_t hash_datetime_from_struct(npy_datetimestruct* dts) { +Py_hash_t +hash_datetime_from_struct(npy_datetimestruct* dts) { /* * If we cannot cast to datetime, use the datetime struct values directly * and mix them similar to a tuple. From 037ba0598b482961be3e1613948925dcd3c6c2a2 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 10 Feb 2023 16:12:36 -0800 Subject: [PATCH 11/26] remove unnecessary casts --- pandas/_libs/src/klib/khash_python.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h index ba80b656ae4dc..3d6eb917e33ee 100644 --- a/pandas/_libs/src/klib/khash_python.h +++ b/pandas/_libs/src/klib/khash_python.h @@ -338,8 +338,8 @@ Py_hash_t np_datetime64_object_hash(PyDatetimeScalarObject* key) { // either across resolutions or with standard library objects. // See also Timestamp.__hash__ - NPY_DATETIMEUNIT unit = (NPY_DATETIMEUNIT)((PyDatetimeScalarObject*)key)->obmeta.base; - npy_datetime value = ((PyDatetimeScalarObject*)key)->obval; + NPY_DATETIMEUNIT unit = (NPY_DATETIMEUNIT)key->obmeta.base; + npy_datetime value = key->obval; npy_datetimestruct dts; if (value == NPY_DATETIME_NAT) { From 704fb6942789347713a5aec9ec029e68dab480b9 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 13 Feb 2023 08:24:37 -0800 Subject: [PATCH 12/26] suggested edit for PyDateTime_IMPORT --- pandas/_libs/src/klib/khash_python.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h index 3d6eb917e33ee..aa64fbf31be79 100644 --- a/pandas/_libs/src/klib/khash_python.h +++ b/pandas/_libs/src/klib/khash_python.h @@ -352,7 +352,10 @@ Py_hash_t np_datetime64_object_hash(PyDatetimeScalarObject* key) { if ((dts.year > 0) && (dts.year <= 9999) && (dts.ps == 0) && (dts.as == 0)) { // we CAN cast to pydatetime, so use that hash to ensure we compare // as matching standard library datetimes (and pd.Timestamps) - PyDateTime_IMPORT; + if (PyDatetimeAPI == NULL) { + /* delayed import, may be nice to move to import time */ + PyDateTime_IMPORT; + } PyObject* dt; Py_hash_t hash; From f838953d07f3ce25df05288d0317374119eb7e60 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 13 Feb 2023 12:59:58 -0800 Subject: [PATCH 13/26] revert delay --- pandas/_libs/src/klib/khash_python.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h index aa64fbf31be79..3d6eb917e33ee 100644 --- a/pandas/_libs/src/klib/khash_python.h +++ b/pandas/_libs/src/klib/khash_python.h @@ -352,10 +352,7 @@ Py_hash_t np_datetime64_object_hash(PyDatetimeScalarObject* key) { if ((dts.year > 0) && (dts.year <= 9999) && (dts.ps == 0) && (dts.as == 0)) { // we CAN cast to pydatetime, so use that hash to ensure we compare // as matching standard library datetimes (and pd.Timestamps) - if (PyDatetimeAPI == NULL) { - /* delayed import, may be nice to move to import time */ - PyDateTime_IMPORT; - } + PyDateTime_IMPORT; PyObject* dt; Py_hash_t hash; From 95069e0acd43503e698f7c256ac5bbb78d421d38 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 13 Feb 2023 19:41:08 -0800 Subject: [PATCH 14/26] restore check --- pandas/_libs/src/klib/khash_python.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h index 3d6eb917e33ee..0b3f6f8979452 100644 --- a/pandas/_libs/src/klib/khash_python.h +++ b/pandas/_libs/src/klib/khash_python.h @@ -352,7 +352,13 @@ Py_hash_t np_datetime64_object_hash(PyDatetimeScalarObject* key) { if ((dts.year > 0) && (dts.year <= 9999) && (dts.ps == 0) && (dts.as == 0)) { // we CAN cast to pydatetime, so use that hash to ensure we compare // as matching standard library datetimes (and pd.Timestamps) - PyDateTime_IMPORT; + if (PyDateTimeAPI == NULL) { + /* delayed import, may be nice to move to import time */ + PyDateTime_IMPORT; + if (PyDateTimeAPI == NULL) { + return -1; + } + } PyObject* dt; Py_hash_t hash; From c94609b706dac5603b722f5b5d4113a30142054e Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Feb 2023 16:38:21 -0800 Subject: [PATCH 15/26] add test --- pandas/tests/test_algos.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index b00b28f1e6033..e2b204dbac149 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1163,6 +1163,15 @@ def test_isin_unsigned_dtype(self): class TestValueCounts: + def test_value_counts_datetime64_mismatched_units(self): + # GH#50960 np.datetime64 objects with different units that are still equal + arr = np.array( + [np.datetime64(1, "ms"), np.datetime64(1000, "us")], dtype=object + ) + res = algos.value_counts(arr) + expected = Series([2], index=arr[:1], name="count") + tm.assert_series_equal(res, expected) + def test_value_counts(self): np.random.seed(1234) from pandas.core.reshape.tile import cut @@ -1619,6 +1628,14 @@ def test_unique_complex_numbers(self, array, expected): result = pd.unique(array) tm.assert_numpy_array_equal(result, expected) + def test_unique_datetime64_mismatched_units(self): + # GH#50960 np.datetime64 objects with different units that are still equal + arr = np.array( + [np.datetime64(1, "ms"), np.datetime64(1000, "us")], dtype=object + ) + res = pd.unique(arr) + tm.assert_numpy_array_equal(res, arr[:1]) + class TestHashTable: @pytest.mark.parametrize( From c55f182d758894ee33d98b93fc9c0c03f21c0ad0 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 11 Mar 2023 13:33:31 -0800 Subject: [PATCH 16/26] shot in the dark --- setup.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/setup.py b/setup.py index b7e3b7c5cc153..db78fa7cbc044 100755 --- a/setup.py +++ b/setup.py @@ -494,6 +494,10 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pandas/_libs/src/parser/tokenizer.h", "pandas/_libs/src/parser/io.h", "pandas/_libs/src/pd_parser.h", + ] + + tseries_depends, + "sources": [ + "pandas/_libs/tslibs/src/datetime/np_datetime.c", ], }, "_libs.reduction": {"pyxfile": "_libs/reduction"}, From 23c2826902c7da410eddde9273571a232e47b874 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 12 Mar 2023 11:39:29 -0700 Subject: [PATCH 17/26] capsule stuff --- .../_libs/tslibs/src/datetime/pd_datetime.c | 1 + .../_libs/tslibs/src/datetime/pd_datetime.h | 1 + setup.py | 22 ++++--------------- 3 files changed, 6 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c index 73f63706f2a88..94a90a67f4299 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -53,6 +53,7 @@ static int pandas_datetime_exec(PyObject *module) { capi->get_datetime_iso_8601_strlen = get_datetime_iso_8601_strlen; capi->make_iso_8601_datetime = make_iso_8601_datetime; capi->make_iso_8601_timedelta = make_iso_8601_timedelta; + capi->hash_datetime_from_struct = hash_datetime_from_struct; PyObject *capsule = PyCapsule_New(capi, PandasDateTime_CAPSULE_NAME, pandas_datetime_destructor); diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h index e80e9bbeb9e6c..26420707c5218 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -55,6 +55,7 @@ typedef struct { int (*make_iso_8601_datetime)(npy_datetimestruct *, char *, int, int, NPY_DATETIMEUNIT); int (*make_iso_8601_timedelta)(pandas_timedeltastruct *, char *, size_t *); + Py_hash_t (*hash_datetime_from_struct)(npy_datetimestruct* dts); } PandasDateTime_CAPI; // The capsule name appears limited to module.attributename; see bpo-32414 diff --git a/setup.py b/setup.py index db78fa7cbc044..6ceb3605b9bb1 100755 --- a/setup.py +++ b/setup.py @@ -445,8 +445,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.algos": { "pyxfile": "_libs/algos", "include": klib_include, - "depends": _pxi_dep["algos"] + tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], + "depends": _pxi_dep["algos"], }, "_libs.arrays": {"pyxfile": "_libs/arrays"}, "_libs.groupby": {"pyxfile": "_libs/groupby"}, @@ -457,30 +456,21 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "depends": ( ["pandas/_libs/src/klib/khash_python.h", "pandas/_libs/src/klib/khash.h"] + _pxi_dep["hashtable"] - + tseries_depends ), - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.index": { "pyxfile": "_libs/index", "include": klib_include, - "depends": _pxi_dep["index"] + tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], + "depends": _pxi_dep["index"], }, "_libs.indexing": {"pyxfile": "_libs/indexing"}, "_libs.internals": {"pyxfile": "_libs/internals"}, "_libs.interval": { "pyxfile": "_libs/interval", "include": klib_include, - "depends": _pxi_dep["interval"] + tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], - }, - "_libs.join": { - "pyxfile": "_libs/join", - "depends": tseries_depends, - "include": klib_include, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], + "depends": _pxi_dep["interval"], }, + "_libs.join": {"pyxfile": "_libs/join", "include": klib_include}, "_libs.lib": { "pyxfile": "_libs/lib", "depends": lib_depends + tseries_depends, @@ -494,10 +484,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pandas/_libs/src/parser/tokenizer.h", "pandas/_libs/src/parser/io.h", "pandas/_libs/src/pd_parser.h", - ] - + tseries_depends, - "sources": [ - "pandas/_libs/tslibs/src/datetime/np_datetime.c", ], }, "_libs.reduction": {"pyxfile": "_libs/reduction"}, From 143b3a347dfdd392d436014996ed2c6ea6ee0845 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 12 Mar 2023 17:06:55 -0700 Subject: [PATCH 18/26] guessing --- setup.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 6ceb3605b9bb1..7f85047347687 100755 --- a/setup.py +++ b/setup.py @@ -445,7 +445,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.algos": { "pyxfile": "_libs/algos", "include": klib_include, - "depends": _pxi_dep["algos"], + "depends": _pxi_dep["algos"] + tseries_depends, }, "_libs.arrays": {"pyxfile": "_libs/arrays"}, "_libs.groupby": {"pyxfile": "_libs/groupby"}, @@ -456,19 +456,20 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "depends": ( ["pandas/_libs/src/klib/khash_python.h", "pandas/_libs/src/klib/khash.h"] + _pxi_dep["hashtable"] + + tseries_depends ), }, "_libs.index": { "pyxfile": "_libs/index", "include": klib_include, - "depends": _pxi_dep["index"], + "depends": _pxi_dep["index"] + tseries_depends, }, "_libs.indexing": {"pyxfile": "_libs/indexing"}, "_libs.internals": {"pyxfile": "_libs/internals"}, "_libs.interval": { "pyxfile": "_libs/interval", "include": klib_include, - "depends": _pxi_dep["interval"], + "depends": _pxi_dep["interval"] + tseries_depends, }, "_libs.join": {"pyxfile": "_libs/join", "include": klib_include}, "_libs.lib": { From ffb836548169bc1fd335186f75e40192ed59a4ab Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 12 Mar 2023 18:16:35 -0700 Subject: [PATCH 19/26] still tryin --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 7f85047347687..88a0750e26dac 100755 --- a/setup.py +++ b/setup.py @@ -470,6 +470,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pyxfile": "_libs/interval", "include": klib_include, "depends": _pxi_dep["interval"] + tseries_depends, + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.join": {"pyxfile": "_libs/join", "include": klib_include}, "_libs.lib": { From 5513721dafee91595aa73ada0014f57bb0e91756 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 13 Mar 2023 09:53:50 -0700 Subject: [PATCH 20/26] macro --- pandas/_libs/tslibs/src/datetime/pd_datetime.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h index 26420707c5218..20e39e146fc16 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -108,6 +108,8 @@ static PandasDateTime_CAPI *PandasDateTimeAPI = NULL; (base)) #define make_iso_8601_timedelta(tds, outstr, outlen) \ PandasDateTimeAPI->make_iso_8601_timedelta((tds), (outstr), (outlen)) +#define hash_datetime_from_struct(dts) \ + PandasDateTimeAPI->hash_datetime_from_struct((dts)) #endif /* !defined(_PANDAS_DATETIME_IMPL) */ #ifdef __cplusplus From 875d6af5871bea1eaa0f0c48ef10a54496e23372 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 13 Mar 2023 10:26:08 -0700 Subject: [PATCH 21/26] revert sources --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 88a0750e26dac..7f85047347687 100755 --- a/setup.py +++ b/setup.py @@ -470,7 +470,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pyxfile": "_libs/interval", "include": klib_include, "depends": _pxi_dep["interval"] + tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.join": {"pyxfile": "_libs/join", "include": klib_include}, "_libs.lib": { From 40e6e170c5fdccd9fffa76c92898140545719d4f Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 13 Mar 2023 12:57:39 -0700 Subject: [PATCH 22/26] Move np_datetime64_object_hash to np_datetime.c --- pandas/_libs/src/klib/khash_python.h | 46 ------------------ .../_libs/tslibs/src/datetime/np_datetime.c | 48 ++++++++++++++++++- .../_libs/tslibs/src/datetime/np_datetime.h | 2 + .../_libs/tslibs/src/datetime/pd_datetime.c | 1 + .../_libs/tslibs/src/datetime/pd_datetime.h | 4 ++ 5 files changed, 54 insertions(+), 47 deletions(-) diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h index 0b3f6f8979452..d70005be918ee 100644 --- a/pandas/_libs/src/klib/khash_python.h +++ b/pandas/_libs/src/klib/khash_python.h @@ -332,52 +332,6 @@ Py_hash_t PANDAS_INLINE tupleobject_hash(PyTupleObject* key) { } -// TODO: same thing for timedelta64 objects -Py_hash_t np_datetime64_object_hash(PyDatetimeScalarObject* key) { - // GH#50690 numpy's hash implementation does not preserve comparabity - // either across resolutions or with standard library objects. - // See also Timestamp.__hash__ - - NPY_DATETIMEUNIT unit = (NPY_DATETIMEUNIT)key->obmeta.base; - npy_datetime value = key->obval; - npy_datetimestruct dts; - - if (value == NPY_DATETIME_NAT) { - // np.datetime64("NaT") in any reso - return NPY_DATETIME_NAT; - } - - pandas_datetime_to_datetimestruct(value, unit, &dts); - - if ((dts.year > 0) && (dts.year <= 9999) && (dts.ps == 0) && (dts.as == 0)) { - // we CAN cast to pydatetime, so use that hash to ensure we compare - // as matching standard library datetimes (and pd.Timestamps) - if (PyDateTimeAPI == NULL) { - /* delayed import, may be nice to move to import time */ - PyDateTime_IMPORT; - if (PyDateTimeAPI == NULL) { - return -1; - } - } - - PyObject* dt; - Py_hash_t hash; - - dt = PyDateTime_FromDateAndTime( - dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us - ); - if (dt == NULL) { - return -1; - } - hash = PyObject_Hash(dt); - Py_DECREF(dt); - return hash; - } - - return hash_datetime_from_struct(&dts); -} - - khuint32_t PANDAS_INLINE kh_python_hash_func(PyObject* key) { Py_hash_t hash; // For PyObject_Hash holds: diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c index b8f555dc97fdc..3c656b48a1152 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -25,8 +25,9 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #include #include #include -#include "np_datetime.h" +#include "np_datetime.h" +#include "datetime.h" const int days_per_month_table[2][12] = { {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, @@ -1086,3 +1087,48 @@ hash_datetime_from_struct(npy_datetimestruct* dts) { } return acc; } + + +// TODO(jbrockmendel): same thing for timedelta64 objects +Py_hash_t np_datetime64_object_hash(PyDatetimeScalarObject* key) { + // GH#50690 numpy's hash implementation does not preserve comparabity + // either across resolutions or with standard library objects. + // See also Timestamp.__hash__ + + NPY_DATETIMEUNIT unit = (NPY_DATETIMEUNIT)key->obmeta.base; + npy_datetime value = key->obval; + npy_datetimestruct dts; + + if (value == NPY_DATETIME_NAT) { + // np.datetime64("NaT") in any reso + return NPY_DATETIME_NAT; + } + + pandas_datetime_to_datetimestruct(value, unit, &dts); + + if ((dts.year > 0) && (dts.year <= 9999) && (dts.ps == 0) && (dts.as == 0)) { + // we CAN cast to pydatetime, so use that hash to ensure we compare + // as matching standard library datetimes (and pd.Timestamps) + if (PyDateTimeAPI == NULL) { + /* delayed import, may be nice to move to import time */ + PyDateTime_IMPORT; + if (PyDateTimeAPI == NULL) { + return -1; + } + } + + PyObject* dt; + Py_hash_t hash; + + dt = PyDateTime_FromDateAndTime( + dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us); + if (dt == NULL) { + return -1; + } + hash = PyObject_Hash(dt); + Py_DECREF(dt); + return hash; + } + + return hash_datetime_from_struct(&dts); +} diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.h b/pandas/_libs/tslibs/src/datetime/np_datetime.h index 7ed4efebcfbd7..0b11c813a3a71 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.h @@ -22,6 +22,7 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #endif // NPY_NO_DEPRECATED_API #include +#include typedef struct { npy_int64 days; @@ -116,6 +117,7 @@ PyArray_DatetimeMetaData get_datetime_metadata_from_dtype( PyArray_Descr *dtype); +Py_hash_t np_datetime64_object_hash(PyDatetimeScalarObject* key); Py_hash_t hash_datetime_from_struct(npy_datetimestruct* dts); Py_uhash_t tuple_update_uhash(Py_uhash_t acc, Py_uhash_t lane); diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c index 94a90a67f4299..41919b07219ec 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -54,6 +54,7 @@ static int pandas_datetime_exec(PyObject *module) { capi->make_iso_8601_datetime = make_iso_8601_datetime; capi->make_iso_8601_timedelta = make_iso_8601_timedelta; capi->hash_datetime_from_struct = hash_datetime_from_struct; + capi->np_datetime64_object_hash = np_datetime64_object_hash; PyObject *capsule = PyCapsule_New(capi, PandasDateTime_CAPSULE_NAME, pandas_datetime_destructor); diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h index 20e39e146fc16..352f6432f155d 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -23,6 +23,7 @@ See NUMPY_LICENSE.txt for the license. #endif // NPY_NO_DEPRECATED_API #include +#include #include "np_datetime.h" #include "np_datetime_strings.h" #include "date_conversions.h" @@ -56,6 +57,7 @@ typedef struct { NPY_DATETIMEUNIT); int (*make_iso_8601_timedelta)(pandas_timedeltastruct *, char *, size_t *); Py_hash_t (*hash_datetime_from_struct)(npy_datetimestruct* dts); + Py_hash_t (*np_datetime64_object_hash)(PyDatetimeScalarObject* key); } PandasDateTime_CAPI; // The capsule name appears limited to module.attributename; see bpo-32414 @@ -110,6 +112,8 @@ static PandasDateTime_CAPI *PandasDateTimeAPI = NULL; PandasDateTimeAPI->make_iso_8601_timedelta((tds), (outstr), (outlen)) #define hash_datetime_from_struct(dts) \ PandasDateTimeAPI->hash_datetime_from_struct((dts)) +#define np_datetime64_object_hash(dts) \ + PandasDateTimeAPI->np_datetime64_object_hash((key)) #endif /* !defined(_PANDAS_DATETIME_IMPL) */ #ifdef __cplusplus From 15a701c3c8b2929e02a4cf4a5720fb1bcc374c6e Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 13 Mar 2023 14:43:08 -0700 Subject: [PATCH 23/26] import_pandas_datetime more --- pandas/_libs/algos.pyx | 3 +++ pandas/_libs/hashtable.pyx | 3 +++ pandas/_libs/join.pyx | 3 +++ 3 files changed, 9 insertions(+) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index adb920e0cca6d..ca8298153105b 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -51,10 +51,13 @@ from pandas._libs.khash cimport ( kh_resize_int64, khiter_t, ) +from pandas._libs.tslibs.np_datetime cimport import_pandas_datetime from pandas._libs.util cimport get_nat import pandas._libs.missing as missing +import_pandas_datetime() + cdef: float64_t FP_ERR = 1e-13 float64_t NaN = np.NaN diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index ccac3d0b50d45..7d453f573b330 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -34,6 +34,9 @@ from pandas._libs.khash cimport ( khiter_t, ) from pandas._libs.missing cimport checknull +from pandas._libs.tslibs.np_datetime cimport import_pandas_datetime + +import_pandas_datetime() def get_hashtable_trace_domain(): diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 2b3b147470cef..b83d34f2fac51 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -18,6 +18,9 @@ from pandas._libs.dtypes cimport ( numeric_object_t, numeric_t, ) +from pandas._libs.tslibs.np_datetime cimport import_pandas_datetime + +import_pandas_datetime() @cython.wraparound(False) From af25f40d46eae55a5c9217bab5f18d81d4dc3d33 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 13 Mar 2023 14:52:58 -0700 Subject: [PATCH 24/26] troubleshoot --- pandas/_libs/tslibs/src/datetime/pd_datetime.c | 1 + pandas/_libs/tslibs/src/datetime/pd_datetime.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c index 41919b07219ec..643a2c4da90d6 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -55,6 +55,7 @@ static int pandas_datetime_exec(PyObject *module) { capi->make_iso_8601_timedelta = make_iso_8601_timedelta; capi->hash_datetime_from_struct = hash_datetime_from_struct; capi->np_datetime64_object_hash = np_datetime64_object_hash; + capi->tuple_update_uhash = tuple_update_uhash; PyObject *capsule = PyCapsule_New(capi, PandasDateTime_CAPSULE_NAME, pandas_datetime_destructor); diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h index 352f6432f155d..23e51d1ee6939 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -58,6 +58,7 @@ typedef struct { int (*make_iso_8601_timedelta)(pandas_timedeltastruct *, char *, size_t *); Py_hash_t (*hash_datetime_from_struct)(npy_datetimestruct* dts); Py_hash_t (*np_datetime64_object_hash)(PyDatetimeScalarObject* key); + Py_uhash_t tuple_update_uhash(Py_uhash_t acc, Py_uhash_t lane); } PandasDateTime_CAPI; // The capsule name appears limited to module.attributename; see bpo-32414 @@ -114,6 +115,8 @@ static PandasDateTime_CAPI *PandasDateTimeAPI = NULL; PandasDateTimeAPI->hash_datetime_from_struct((dts)) #define np_datetime64_object_hash(dts) \ PandasDateTimeAPI->np_datetime64_object_hash((key)) +#define tuple_update_uhash(acc, lane) \ + PandasDateTimeAPI->tuple_update_uhash((acc), (lane)) #endif /* !defined(_PANDAS_DATETIME_IMPL) */ #ifdef __cplusplus From bd7d4325042d5e1edfa4e696cc1a81dda8850fc4 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Mar 2023 16:35:55 -0700 Subject: [PATCH 25/26] post-merge merges --- pandas/_libs/algos.pyx | 3 - pandas/_libs/hashtable.pyx | 3 - pandas/_libs/join.pyx | 3 - .../_libs/tslibs/src/datetime/pd_datetime.c | 101 -------------- .../_libs/tslibs/src/datetime/pd_datetime.h | 125 ------------------ setup.py | 10 +- 6 files changed, 9 insertions(+), 236 deletions(-) delete mode 100644 pandas/_libs/tslibs/src/datetime/pd_datetime.c delete mode 100644 pandas/_libs/tslibs/src/datetime/pd_datetime.h diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index ca8298153105b..adb920e0cca6d 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -51,13 +51,10 @@ from pandas._libs.khash cimport ( kh_resize_int64, khiter_t, ) -from pandas._libs.tslibs.np_datetime cimport import_pandas_datetime from pandas._libs.util cimport get_nat import pandas._libs.missing as missing -import_pandas_datetime() - cdef: float64_t FP_ERR = 1e-13 float64_t NaN = np.NaN diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index 7d453f573b330..ccac3d0b50d45 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -34,9 +34,6 @@ from pandas._libs.khash cimport ( khiter_t, ) from pandas._libs.missing cimport checknull -from pandas._libs.tslibs.np_datetime cimport import_pandas_datetime - -import_pandas_datetime() def get_hashtable_trace_domain(): diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index b83d34f2fac51..2b3b147470cef 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -18,9 +18,6 @@ from pandas._libs.dtypes cimport ( numeric_object_t, numeric_t, ) -from pandas._libs.tslibs.np_datetime cimport import_pandas_datetime - -import_pandas_datetime() @cython.wraparound(False) diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c deleted file mode 100644 index 643a2c4da90d6..0000000000000 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. - -Copyright (c) 2005-2011, NumPy Developers -All rights reserved. - -This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt - -*/ - -#define _PANDAS_DATETIME_IMPL - -#define PY_SSIZE_T_CLEAN -#include - -#include "datetime.h" -#include "pd_datetime.h" - - -static void pandas_datetime_destructor(PyObject *op) { - void *ptr = PyCapsule_GetPointer(op, PandasDateTime_CAPSULE_NAME); - PyMem_Free(ptr); -} - -static int pandas_datetime_exec(PyObject *module) { - PyDateTime_IMPORT; - PandasDateTime_CAPI *capi = PyMem_Malloc(sizeof(PandasDateTime_CAPI)); - if (capi == NULL) { - PyErr_NoMemory(); - return -1; - } - capi->npy_datetimestruct_to_datetime = npy_datetimestruct_to_datetime; - capi->scaleNanosecToUnit = scaleNanosecToUnit; - capi->int64ToIso = int64ToIso; - capi->NpyDateTimeToEpoch = NpyDateTimeToEpoch; - capi->PyDateTimeToIso = PyDateTimeToIso; - capi->PyDateTimeToEpoch = PyDateTimeToEpoch; - capi->int64ToIsoDuration = int64ToIsoDuration; - capi->pandas_datetime_to_datetimestruct = pandas_datetime_to_datetimestruct; - capi->pandas_timedelta_to_timedeltastruct = - pandas_timedelta_to_timedeltastruct; - capi->convert_pydatetime_to_datetimestruct = - convert_pydatetime_to_datetimestruct; - capi->cmp_npy_datetimestruct = cmp_npy_datetimestruct; - capi->get_datetime_metadata_from_dtype = get_datetime_metadata_from_dtype; - capi->parse_iso_8601_datetime = parse_iso_8601_datetime; - capi->get_datetime_iso_8601_strlen = get_datetime_iso_8601_strlen; - capi->make_iso_8601_datetime = make_iso_8601_datetime; - capi->make_iso_8601_timedelta = make_iso_8601_timedelta; - capi->hash_datetime_from_struct = hash_datetime_from_struct; - capi->np_datetime64_object_hash = np_datetime64_object_hash; - capi->tuple_update_uhash = tuple_update_uhash; - - PyObject *capsule = PyCapsule_New(capi, PandasDateTime_CAPSULE_NAME, - pandas_datetime_destructor); - if (capsule == NULL) { - PyMem_Free(capi); - return -1; - } - - // Monkeypatch the top level pandas module to have an attribute for the - // C-API. This is required because Python capsules do not support setting - // this attribute on anything but the top level package. Ideally not - // done when cpython gh-6898 gets implemented - PyObject *pandas = PyImport_ImportModule("pandas"); - if (!pandas) { - PyErr_SetString(PyExc_ImportError, - "pd_datetime.c could not import module pandas"); - Py_DECREF(capsule); - return -1; - } - - if (PyModule_AddObject(pandas, "_pandas_datetime_CAPI", capsule) < 0) { - Py_DECREF(capsule); - return -1; - } - - return 0; -} - -static PyModuleDef_Slot pandas_datetime_slots[] = { - {Py_mod_exec, pandas_datetime_exec}, {0, NULL}}; - -static struct PyModuleDef pandas_datetimemodule = { - PyModuleDef_HEAD_INIT, - .m_name = "pandas._libs.pandas_datetime", - - .m_doc = "Internal module with datetime support for other extensions", - .m_size = 0, - .m_methods = NULL, - .m_slots = pandas_datetime_slots}; - -PyMODINIT_FUNC PyInit_pandas_datetime(void) { - return PyModuleDef_Init(&pandas_datetimemodule); -} diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h deleted file mode 100644 index 23e51d1ee6939..0000000000000 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. -Written by Mark Wiebe (mwwiebe@gmail.com) - -Copyright (c) 2011 by Enthought, Inc. -Copyright (c) 2005-2011, NumPy Developers - -All rights reserved. -See NUMPY_LICENSE.txt for the license. -*/ - -#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H_ -#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H_ - -#ifndef NPY_NO_DEPRECATED_API -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#endif // NPY_NO_DEPRECATED_API - -#include -#include -#include "np_datetime.h" -#include "np_datetime_strings.h" -#include "date_conversions.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - npy_datetime (*npy_datetimestruct_to_datetime)(NPY_DATETIMEUNIT, - const npy_datetimestruct *); - int (*scaleNanosecToUnit)(npy_int64 *, NPY_DATETIMEUNIT); - char *(*int64ToIso)(int64_t, NPY_DATETIMEUNIT, size_t *); - npy_datetime (*NpyDateTimeToEpoch)(npy_datetime, NPY_DATETIMEUNIT); - char *(*PyDateTimeToIso)(PyObject *, NPY_DATETIMEUNIT, size_t *); - npy_datetime (*PyDateTimeToEpoch)(PyObject *, NPY_DATETIMEUNIT); - char *(*int64ToIsoDuration)(int64_t, size_t *); - void (*pandas_datetime_to_datetimestruct)(npy_datetime, NPY_DATETIMEUNIT, - npy_datetimestruct *); - void (*pandas_timedelta_to_timedeltastruct)(npy_datetime, NPY_DATETIMEUNIT, - pandas_timedeltastruct *); - int (*convert_pydatetime_to_datetimestruct)(PyObject *, npy_datetimestruct *); - int (*cmp_npy_datetimestruct)(const npy_datetimestruct *, - const npy_datetimestruct *); - PyArray_DatetimeMetaData (*get_datetime_metadata_from_dtype)(PyArray_Descr *); - int (*parse_iso_8601_datetime)(const char *, int, int, npy_datetimestruct *, - NPY_DATETIMEUNIT *, int *, int *, const char *, - int, FormatRequirement); - int (*get_datetime_iso_8601_strlen)(int, NPY_DATETIMEUNIT); - int (*make_iso_8601_datetime)(npy_datetimestruct *, char *, int, int, - NPY_DATETIMEUNIT); - int (*make_iso_8601_timedelta)(pandas_timedeltastruct *, char *, size_t *); - Py_hash_t (*hash_datetime_from_struct)(npy_datetimestruct* dts); - Py_hash_t (*np_datetime64_object_hash)(PyDatetimeScalarObject* key); - Py_uhash_t tuple_update_uhash(Py_uhash_t acc, Py_uhash_t lane); -} PandasDateTime_CAPI; - -// The capsule name appears limited to module.attributename; see bpo-32414 -// cpython has an open PR gh-6898 to fix, but hasn't had traction for years -#define PandasDateTime_CAPSULE_NAME "pandas._pandas_datetime_CAPI" - -/* block used as part of public API */ -#ifndef _PANDAS_DATETIME_IMPL -static PandasDateTime_CAPI *PandasDateTimeAPI = NULL; - -#define PandasDateTime_IMPORT \ - PandasDateTimeAPI = \ - (PandasDateTime_CAPI *)PyCapsule_Import(PandasDateTime_CAPSULE_NAME, 0) - -#define npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT, npy_datetimestruct) \ - PandasDateTimeAPI->npy_datetimestruct_to_datetime((NPY_DATETIMEUNIT), \ - (npy_datetimestruct)) -#define scaleNanosecToUnit(value, unit) \ - PandasDateTimeAPI->scaleNanosecToUnit((value), (unit)) -#define int64ToIso(value, base, len) \ - PandasDateTimeAPI->int64ToIso((value), (base), (len)) -#define NpyDateTimeToEpoch(dt, base) \ - PandasDateTimeAPI->NpyDateTimeToEpoch((dt), (base)) -#define PyDateTimeToIso(obj, base, len) \ - PandasDateTimeAPI->PyDateTimeToIso((obj), (base), (len)) -#define PyDateTimeToEpoch(dt, base) \ - PandasDateTimeAPI->PyDateTimeToEpoch((dt), (base)) -#define int64ToIsoDuration(value, len) \ - PandasDateTimeAPI->int64ToIsoDuration((value), (len)) -#define pandas_datetime_to_datetimestruct(dt, base, out) \ - PandasDateTimeAPI->pandas_datetime_to_datetimestruct((dt), (base), (out)) -#define pandas_timedelta_to_timedeltastruct(td, base, out) \ - PandasDateTimeAPI->pandas_timedelta_to_timedeltastruct((td), (base), (out)) -#define convert_pydatetime_to_datetimestruct(dtobj, out) \ - PandasDateTimeAPI->convert_pydatetime_to_datetimestruct((dtobj), (out)) -#define cmp_npy_datetimestruct(a, b) \ - PandasDateTimeAPI->cmp_npy_datetimestruct((a), (b)) -#define get_datetime_metadata_from_dtype(dtype) \ - PandasDateTimeAPI->get_datetime_metadata_from_dtype((dtype)) -#define parse_iso_8601_datetime(str, len, want_exc, out, out_bestunit, \ - out_local, out_tzoffset, format, format_len, \ - format_requirement) \ - PandasDateTimeAPI->parse_iso_8601_datetime( \ - (str), (len), (want_exc), (out), (out_bestunit), (out_local), \ - (out_tzoffset), (format), (format_len), (format_requirement)) -#define get_datetime_iso_8601_strlen(local, base) \ - PandasDateTimeAPI->get_datetime_iso_8601_strlen((local), (base)) -#define make_iso_8601_datetime(dts, outstr, outlen, utc, base) \ - PandasDateTimeAPI->make_iso_8601_datetime((dts), (outstr), (outlen), (utc), \ - (base)) -#define make_iso_8601_timedelta(tds, outstr, outlen) \ - PandasDateTimeAPI->make_iso_8601_timedelta((tds), (outstr), (outlen)) -#define hash_datetime_from_struct(dts) \ - PandasDateTimeAPI->hash_datetime_from_struct((dts)) -#define np_datetime64_object_hash(dts) \ - PandasDateTimeAPI->np_datetime64_object_hash((key)) -#define tuple_update_uhash(acc, lane) \ - PandasDateTimeAPI->tuple_update_uhash((acc), (lane)) -#endif /* !defined(_PANDAS_DATETIME_IMPL) */ - -#ifdef __cplusplus -} -#endif -#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H_ diff --git a/setup.py b/setup.py index 5dd566a4d850a..abf87f6951ac2 100755 --- a/setup.py +++ b/setup.py @@ -444,6 +444,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pyxfile": "_libs/algos", "include": klib_include, "depends": _pxi_dep["algos"] + tseries_depends, + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.arrays": {"pyxfile": "_libs/arrays"}, "_libs.groupby": {"pyxfile": "_libs/groupby"}, @@ -456,11 +457,13 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): + _pxi_dep["hashtable"] + tseries_depends ), + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.index": { "pyxfile": "_libs/index", "include": klib_include, "depends": _pxi_dep["index"] + tseries_depends, + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.indexing": {"pyxfile": "_libs/indexing"}, "_libs.internals": {"pyxfile": "_libs/internals"}, @@ -468,8 +471,13 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pyxfile": "_libs/interval", "include": klib_include, "depends": _pxi_dep["interval"] + tseries_depends, + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], + }, + "_libs.join": { + "pyxfile": "_libs/join", + "include": klib_include, + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, - "_libs.join": {"pyxfile": "_libs/join", "include": klib_include}, "_libs.lib": { "pyxfile": "_libs/lib", "depends": lib_depends + tseries_depends, From 394d86e69b23098391cf9b5fa61c7976586809fb Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Mar 2023 16:46:05 -0700 Subject: [PATCH 26/26] frickin guess --- setup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup.py b/setup.py index abf87f6951ac2..17a30889f6c64 100755 --- a/setup.py +++ b/setup.py @@ -463,7 +463,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pyxfile": "_libs/index", "include": klib_include, "depends": _pxi_dep["index"] + tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.indexing": {"pyxfile": "_libs/indexing"}, "_libs.internals": {"pyxfile": "_libs/internals"}, @@ -471,7 +470,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pyxfile": "_libs/interval", "include": klib_include, "depends": _pxi_dep["interval"] + tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.join": { "pyxfile": "_libs/join",