From 8798d32a9a1315b5dc87b4b8f75481da51dca7ea Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 10 Dec 2019 16:51:49 -0800 Subject: [PATCH 1/5] Stopping at parsers --- pandas/io/msgpack/_packer.pyx | 2 +- pandas/io/msgpack/_unpacker.pyx | 2 +- setup.py | 59 ++++++++++++++------------------- 3 files changed, 26 insertions(+), 37 deletions(-) diff --git a/pandas/io/msgpack/_packer.pyx b/pandas/io/msgpack/_packer.pyx index 5c0499b489110..e86099740cc74 100644 --- a/pandas/io/msgpack/_packer.pyx +++ b/pandas/io/msgpack/_packer.pyx @@ -19,7 +19,7 @@ from pandas.io.msgpack import ExtType import numpy as np -cdef extern from "../../src/msgpack/pack.h": +cdef extern from "../../_libs/src/msgpack/pack.h": struct msgpack_packer: char* buf size_t length diff --git a/pandas/io/msgpack/_unpacker.pyx b/pandas/io/msgpack/_unpacker.pyx index f1817f29bd42a..67579b0cd23fb 100644 --- a/pandas/io/msgpack/_unpacker.pyx +++ b/pandas/io/msgpack/_unpacker.pyx @@ -21,7 +21,7 @@ from pandas.io.msgpack.exceptions import (BufferFull, OutOfData, from pandas.io.msgpack import ExtType -cdef extern from "../../src/msgpack/unpack.h": +cdef extern from "../../_libs/src/msgpack/unpack.h": ctypedef struct msgpack_user: bint use_list PyObject* object_hook diff --git a/setup.py b/setup.py index e6a95d4e7afd8..eaca968f4bb88 100755 --- a/setup.py +++ b/setup.py @@ -538,12 +538,10 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): return pjoin("pandas", subdir, name + suffix) -common_include = ["pandas/_libs/src/klib", "pandas/_libs/src"] -ts_include = ["pandas/_libs/tslibs/src", "pandas/_libs/tslibs"] - - lib_depends = ["pandas/_libs/src/parse_helper.h"] +klib_include = ["pandas/_libs/src/klib"] + np_datetime_headers = [ "pandas/_libs/tslibs/src/datetime/np_datetime.h", "pandas/_libs/tslibs/src/datetime/np_datetime_strings.h", @@ -557,36 +555,41 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): ext_data = { - "_libs.algos": {"pyxfile": "_libs/algos", "depends": _pxi_dep["algos"]}, + "_libs.algos": {"pyxfile": "_libs/algos", + "include": klib_include, + "depends": _pxi_dep["algos"]}, "_libs.groupby": {"pyxfile": "_libs/groupby"}, - "_libs.hashing": {"pyxfile": "_libs/hashing", "include": [], "depends": []}, - "_libs.hashtable": { - "pyxfile": "_libs/hashtable", - "depends": (["pandas/_libs/src/klib/khash_python.h"] + _pxi_dep["hashtable"]), - }, + "_libs.hashing": {"pyxfile": "_libs/hashing", "depends": []}, "_libs.index": { "pyxfile": "_libs/index", - "include": common_include + ts_include, + "include": klib_include, # due to Hashtable import "depends": _pxi_dep["index"], "sources": np_datetime_sources, }, "_libs.indexing": {"pyxfile": "_libs/indexing"}, "_libs.internals": {"pyxfile": "_libs/internals"}, - "_libs.interval": {"pyxfile": "_libs/interval", "depends": _pxi_dep["interval"]}, - "_libs.join": {"pyxfile": "_libs/join"}, + "_libs.interval": { + "pyxfile": "_libs/interval", + "include": klib_include, # due to Hashtable import + "depends": _pxi_dep["interval"] + }, + "_libs.join": { + "pyxfile": "_libs/join", + "include": klib_include # due to Hashtable import + }, "_libs.lib": { "pyxfile": "_libs/lib", - "include": common_include + ts_include, "depends": lib_depends + tseries_depends, + "include": klib_include, # due to tokenizer import "sources": ["pandas/_libs/src/parser/tokenizer.c"], }, "_libs.missing": { "pyxfile": "_libs/missing", - "include": common_include + ts_include, "depends": tseries_depends, }, "_libs.parsers": { "pyxfile": "_libs/parsers", + "include": klib_include + ["pandas/_libs/src/headers"], # portable.h "depends": [ "pandas/_libs/src/parser/tokenizer.h", "pandas/_libs/src/parser/io.h", @@ -598,45 +601,39 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): }, "_libs.reduction": {"pyxfile": "_libs/reduction"}, "_libs.ops": {"pyxfile": "_libs/ops"}, - "_libs.properties": {"pyxfile": "_libs/properties", "include": []}, + "_libs.properties": {"pyxfile": "_libs/properties"}, "_libs.reshape": {"pyxfile": "_libs/reshape", "depends": []}, "_libs.sparse": {"pyxfile": "_libs/sparse", "depends": _pxi_dep["sparse"]}, "_libs.tslib": { "pyxfile": "_libs/tslib", - "include": ts_include, "depends": tseries_depends, "sources": np_datetime_sources, }, "_libs.tslibs.c_timestamp": { "pyxfile": "_libs/tslibs/c_timestamp", - "include": ts_include, "depends": tseries_depends, "sources": np_datetime_sources, }, - "_libs.tslibs.ccalendar": {"pyxfile": "_libs/tslibs/ccalendar", "include": []}, + "_libs.tslibs.ccalendar": {"pyxfile": "_libs/tslibs/ccalendar"}, "_libs.tslibs.conversion": { "pyxfile": "_libs/tslibs/conversion", - "include": ts_include, "depends": tseries_depends, "sources": np_datetime_sources, }, "_libs.tslibs.fields": { "pyxfile": "_libs/tslibs/fields", - "include": ts_include, "depends": tseries_depends, "sources": np_datetime_sources, }, - "_libs.tslibs.frequencies": {"pyxfile": "_libs/tslibs/frequencies", "include": []}, - "_libs.tslibs.nattype": {"pyxfile": "_libs/tslibs/nattype", "include": []}, + "_libs.tslibs.frequencies": {"pyxfile": "_libs/tslibs/frequencies"}, + "_libs.tslibs.nattype": {"pyxfile": "_libs/tslibs/nattype"}, "_libs.tslibs.np_datetime": { "pyxfile": "_libs/tslibs/np_datetime", - "include": ts_include, "depends": np_datetime_headers, "sources": np_datetime_sources, }, "_libs.tslibs.offsets": { "pyxfile": "_libs/tslibs/offsets", - "include": ts_include, "depends": tseries_depends, "sources": np_datetime_sources, }, @@ -647,38 +644,32 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): }, "_libs.tslibs.period": { "pyxfile": "_libs/tslibs/period", - "include": ts_include, "depends": tseries_depends, "sources": np_datetime_sources, }, "_libs.tslibs.resolution": { "pyxfile": "_libs/tslibs/resolution", - "include": ts_include, "depends": tseries_depends, "sources": np_datetime_sources, }, "_libs.tslibs.strptime": { "pyxfile": "_libs/tslibs/strptime", - "include": ts_include, "depends": tseries_depends, "sources": np_datetime_sources, }, "_libs.tslibs.timedeltas": { "pyxfile": "_libs/tslibs/timedeltas", - "include": ts_include, "depends": np_datetime_headers, "sources": np_datetime_sources, }, "_libs.tslibs.timestamps": { "pyxfile": "_libs/tslibs/timestamps", - "include": ts_include, "depends": tseries_depends, "sources": np_datetime_sources, }, - "_libs.tslibs.timezones": {"pyxfile": "_libs/tslibs/timezones", "include": []}, + "_libs.tslibs.timezones": {"pyxfile": "_libs/tslibs/timezones"}, "_libs.tslibs.tzconversion": { "pyxfile": "_libs/tslibs/tzconversion", - "include": ts_include, "depends": tseries_depends, "sources": np_datetime_sources, }, @@ -697,7 +688,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pandas/_libs/src/msgpack/pack.h", "pandas/_libs/src/msgpack/pack_template.h", ], - "include": ["pandas/_libs/src/msgpack"] + common_include, "language": "c++", "suffix": ".cpp", "pyxfile": "io/msgpack/_packer", @@ -710,7 +700,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pandas/_libs/src/msgpack/unpack_template.h", ], "macros": endian_macro + macros, - "include": ["pandas/_libs/src/msgpack"] + common_include, "language": "c++", "suffix": ".cpp", "pyxfile": "io/msgpack/_unpacker", @@ -727,7 +716,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): sources.extend(data.get("sources", [])) - include = data.get("include", common_include) + include = data.get("include") obj = Extension( "pandas.{name}".format(name=name), From 917386e9c646ca73e9272182aa1bee5795ce32e7 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 10 Dec 2019 20:48:57 -0800 Subject: [PATCH 2/5] cleanups --- pandas/io/msgpack/_packer.pyx | 2 +- pandas/io/msgpack/_unpacker.pyx | 2 +- setup.py | 15 +++++++++++---- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/pandas/io/msgpack/_packer.pyx b/pandas/io/msgpack/_packer.pyx index e86099740cc74..a6f7da1d10b35 100644 --- a/pandas/io/msgpack/_packer.pyx +++ b/pandas/io/msgpack/_packer.pyx @@ -19,7 +19,7 @@ from pandas.io.msgpack import ExtType import numpy as np -cdef extern from "../../_libs/src/msgpack/pack.h": +cdef extern from "pandas/_libs/src/msgpack/pack.h": struct msgpack_packer: char* buf size_t length diff --git a/pandas/io/msgpack/_unpacker.pyx b/pandas/io/msgpack/_unpacker.pyx index 67579b0cd23fb..9c11404c37657 100644 --- a/pandas/io/msgpack/_unpacker.pyx +++ b/pandas/io/msgpack/_unpacker.pyx @@ -21,7 +21,7 @@ from pandas.io.msgpack.exceptions import (BufferFull, OutOfData, from pandas.io.msgpack import ExtType -cdef extern from "../../_libs/src/msgpack/unpack.h": +cdef extern from "pandas/_libs/src/msgpack/unpack.h": ctypedef struct msgpack_user: bint use_list PyObject* object_hook diff --git a/setup.py b/setup.py index eaca968f4bb88..4f6032ff9b927 100755 --- a/setup.py +++ b/setup.py @@ -560,9 +560,14 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "depends": _pxi_dep["algos"]}, "_libs.groupby": {"pyxfile": "_libs/groupby"}, "_libs.hashing": {"pyxfile": "_libs/hashing", "depends": []}, + "_libs.hashtable": { + "pyxfile": "_libs/hashtable", + "include": klib_include, + "depends": (["pandas/_libs/src/klib/khash_python.h"] + _pxi_dep["hashtable"]), + }, "_libs.index": { "pyxfile": "_libs/index", - "include": klib_include, # due to Hashtable import + "include": klib_include, "depends": _pxi_dep["index"], "sources": np_datetime_sources, }, @@ -570,12 +575,12 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.internals": {"pyxfile": "_libs/internals"}, "_libs.interval": { "pyxfile": "_libs/interval", - "include": klib_include, # due to Hashtable import + "include": klib_include, "depends": _pxi_dep["interval"] }, "_libs.join": { "pyxfile": "_libs/join", - "include": klib_include # due to Hashtable import + "include": klib_include }, "_libs.lib": { "pyxfile": "_libs/lib", @@ -589,7 +594,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): }, "_libs.parsers": { "pyxfile": "_libs/parsers", - "include": klib_include + ["pandas/_libs/src/headers"], # portable.h + "include": klib_include + ["pandas/_libs/src"], "depends": [ "pandas/_libs/src/parser/tokenizer.h", "pandas/_libs/src/parser/io.h", @@ -639,6 +644,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): }, "_libs.tslibs.parsing": { "pyxfile": "_libs/tslibs/parsing", + "include": klib_include, "depends": ["pandas/_libs/src/parser/tokenizer.h"], "sources": ["pandas/_libs/src/parser/tokenizer.c"], }, @@ -699,6 +705,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pandas/_libs/src/msgpack/unpack_define.h", "pandas/_libs/src/msgpack/unpack_template.h", ], + "include": ["pandas/_libs/src"], "macros": endian_macro + macros, "language": "c++", "suffix": ".cpp", From 93103dc5002f5d155eac260f84bcfc4a01e4ddc8 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 10 Dec 2019 20:49:25 -0800 Subject: [PATCH 3/5] black --- setup.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/setup.py b/setup.py index 4f6032ff9b927..f38bb4c1146dd 100755 --- a/setup.py +++ b/setup.py @@ -555,9 +555,11 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): ext_data = { - "_libs.algos": {"pyxfile": "_libs/algos", - "include": klib_include, - "depends": _pxi_dep["algos"]}, + "_libs.algos": { + "pyxfile": "_libs/algos", + "include": klib_include, + "depends": _pxi_dep["algos"], + }, "_libs.groupby": {"pyxfile": "_libs/groupby"}, "_libs.hashing": {"pyxfile": "_libs/hashing", "depends": []}, "_libs.hashtable": { @@ -576,22 +578,16 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.interval": { "pyxfile": "_libs/interval", "include": klib_include, - "depends": _pxi_dep["interval"] - }, - "_libs.join": { - "pyxfile": "_libs/join", - "include": klib_include + "depends": _pxi_dep["interval"], }, + "_libs.join": {"pyxfile": "_libs/join", "include": klib_include}, "_libs.lib": { "pyxfile": "_libs/lib", "depends": lib_depends + tseries_depends, "include": klib_include, # due to tokenizer import "sources": ["pandas/_libs/src/parser/tokenizer.c"], }, - "_libs.missing": { - "pyxfile": "_libs/missing", - "depends": tseries_depends, - }, + "_libs.missing": {"pyxfile": "_libs/missing", "depends": tseries_depends,}, "_libs.parsers": { "pyxfile": "_libs/parsers", "include": klib_include + ["pandas/_libs/src"], @@ -683,7 +679,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.window.aggregations": { "pyxfile": "_libs/window/aggregations", "language": "c++", - "suffix": ".cpp" + "suffix": ".cpp", }, "_libs.window.indexers": {"pyxfile": "_libs/window/indexers"}, "_libs.writers": {"pyxfile": "_libs/writers"}, @@ -705,7 +701,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pandas/_libs/src/msgpack/unpack_define.h", "pandas/_libs/src/msgpack/unpack_template.h", ], - "include": ["pandas/_libs/src"], + "include": ["pandas/_libs/src"], "macros": endian_macro + macros, "language": "c++", "suffix": ".cpp", From 51ea259316710adfd22b3f7d708a6bdbe875e16d Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 10 Dec 2019 20:57:04 -0800 Subject: [PATCH 4/5] msgpack fix --- pandas/io/msgpack/_packer.pyx | 2 +- pandas/io/msgpack/_unpacker.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/msgpack/_packer.pyx b/pandas/io/msgpack/_packer.pyx index a6f7da1d10b35..e86099740cc74 100644 --- a/pandas/io/msgpack/_packer.pyx +++ b/pandas/io/msgpack/_packer.pyx @@ -19,7 +19,7 @@ from pandas.io.msgpack import ExtType import numpy as np -cdef extern from "pandas/_libs/src/msgpack/pack.h": +cdef extern from "../../_libs/src/msgpack/pack.h": struct msgpack_packer: char* buf size_t length diff --git a/pandas/io/msgpack/_unpacker.pyx b/pandas/io/msgpack/_unpacker.pyx index 9c11404c37657..67579b0cd23fb 100644 --- a/pandas/io/msgpack/_unpacker.pyx +++ b/pandas/io/msgpack/_unpacker.pyx @@ -21,7 +21,7 @@ from pandas.io.msgpack.exceptions import (BufferFull, OutOfData, from pandas.io.msgpack import ExtType -cdef extern from "pandas/_libs/src/msgpack/unpack.h": +cdef extern from "../../_libs/src/msgpack/unpack.h": ctypedef struct msgpack_user: bint use_list PyObject* object_hook From aab348a815ba34e6ac3b7b473d1e997e25cd5e93 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 10 Dec 2019 21:16:19 -0800 Subject: [PATCH 5/5] flake8 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f38bb4c1146dd..1d7a9b8801c2f 100755 --- a/setup.py +++ b/setup.py @@ -587,7 +587,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "include": klib_include, # due to tokenizer import "sources": ["pandas/_libs/src/parser/tokenizer.c"], }, - "_libs.missing": {"pyxfile": "_libs/missing", "depends": tseries_depends,}, + "_libs.missing": {"pyxfile": "_libs/missing", "depends": tseries_depends}, "_libs.parsers": { "pyxfile": "_libs/parsers", "include": klib_include + ["pandas/_libs/src"],