Skip to content

Refactor Extension Modules #53346

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,7 @@ repos:
rev: 1.6.1
hooks:
- id: cpplint
# We don't lint all C files because we don't want to lint any that are built
# from Cython files nor do we want to lint C files that we didn't modify for
# this particular codebase (e.g. src/headers, src/klib). However,
# we can lint all header files since they aren't "generated" like C files are.
exclude: ^pandas/_libs/src/(klib|headers)/
exclude: ^pandas/_libs/include/pandas/vendored/klib
args: [
--quiet,
'--extensions=c,h',
Expand Down
4 changes: 1 addition & 3 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,4 @@ prune pandas/tests/io/parser/data

# Selectively re-add *.cxx files that were excluded above
graft pandas/_libs/src
graft pandas/_libs/tslibs/src
include pandas/_libs/pd_parser.h
include pandas/_libs/pd_parser.c
graft pandas/_libs/include
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ See NUMPY_LICENSE.txt for the license.
#endif // NPY_NO_DEPRECATED_API

#include <numpy/ndarraytypes.h>
#include "np_datetime.h"
#include "np_datetime_strings.h"
#include "date_conversions.h"
#include "pandas/vendored/numpy/datetime/np_datetime.h"
#include "pandas/vendored/numpy/datetime/np_datetime_strings.h"
#include "pandas/datetime/date_conversions.h"

#ifdef __cplusplus
extern "C" {
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ extern "C" {

#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include "src/parser/tokenizer.h"
#include "pandas/parser/tokenizer.h"

typedef struct {
int (*to_double)(char *, double *, char, char, int *);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ See LICENSE for the license
#define ERROR_INVALID_CHARS 3

#include <stdint.h>
#include "../inline_helper.h"
#include "../headers/portable.h"
#include "pandas/inline_helper.h"
#include "pandas/portable.h"

#include "khash.h"
#include "pandas/vendored/klib/khash.h"

#define STREAM_INIT_SIZE 32

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
/*
Copyright (c) 2016, PyData Development Team
All rights reserved.

Distributed under the terms of the BSD Simplified License.

The full license is in the LICENSE file, distributed with this software.
*/

#pragma once

#include <string.h>

#if defined(_MSC_VER)
#define strcasecmp( s1, s2 ) _stricmp( s1, s2 )
#define strcasecmp(s1, s2) _stricmp(s1, s2)
#endif

// GH-23516 - works around locale perf issues
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Python recipe (https://rhettinger.wordpress.com/2010/02/06/lost-knowledge/)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "inline_helper.h"
#include "pandas/inline_helper.h"

PANDAS_INLINE float __skiplist_nanf(void) {
const union {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ int main() {
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include "../inline_helper.h"
#include "pandas/inline_helper.h"


// hooks for memory allocator, C-runtime allocator used per default
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ tree doesn't have cyclic references.

#include <stdio.h>
#include <wchar.h>
#include "../../headers/portable.h"
#include "pandas/portable.h"

// Don't output any extra whitespaces when encoding
#define JSON_NO_EXTRA_WHITESPACE
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/khash.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ from numpy cimport (
)


cdef extern from "khash_python.h":
cdef extern from "pandas/vendored/klib/khash_python.h":
const int KHASH_TRACE_DOMAIN

ctypedef uint32_t khuint_t
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/khash_for_primitive_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ primitive_types = [('int64', 'int64_t'),

{{for name, c_type in primitive_types}}

cdef extern from "khash_python.h":
cdef extern from "pandas/vendored/klib/khash_python.h":
ctypedef struct kh_{{name}}_t:
khuint_t n_buckets, size, n_occupied, upper_bound
uint32_t *flags
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ cdef extern from "numpy/arrayobject.h":
cdef extern from "numpy/ndarrayobject.h":
bint PyArray_CheckScalar(obj) nogil

cdef extern from "pd_parser.h":
cdef extern from "pandas/parser/pd_parser.h":
int floatify(object, float64_t *result, int *maybe_int) except -1
void PandasParser_IMPORT()

Expand Down
48 changes: 18 additions & 30 deletions pandas/_libs/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -61,53 +61,41 @@ subdir('tslibs')
libs_sources = {
# Dict of extension name -> dict of {sources, include_dirs, and deps}
# numpy include dir is implicitly included
'algos': {'sources': ['algos.pyx', _algos_common_helper, _algos_take_helper, _khash_primitive_helper],
'include_dirs': klib_include},
'algos': {'sources': ['algos.pyx', _algos_common_helper, _algos_take_helper, _khash_primitive_helper]},
'arrays': {'sources': ['arrays.pyx']},
'groupby': {'sources': ['groupby.pyx']},
'hashing': {'sources': ['hashing.pyx']},
'hashtable': {'sources': ['hashtable.pyx', _khash_primitive_helper, _hashtable_class_helper, _hashtable_func_helper],
'include_dirs': klib_include},
'index': {'sources': ['index.pyx', _index_class_helper],
'include_dirs': [klib_include, 'tslibs']},
'hashtable': {'sources': ['hashtable.pyx', _khash_primitive_helper, _hashtable_class_helper, _hashtable_func_helper]},
'index': {'sources': ['index.pyx', _index_class_helper]},
'indexing': {'sources': ['indexing.pyx']},
'internals': {'sources': ['internals.pyx']},
'interval': {'sources': ['interval.pyx', _intervaltree_helper],
'include_dirs': [klib_include, 'tslibs']},
'interval': {'sources': ['interval.pyx', _intervaltree_helper]},
'join': {'sources': ['join.pyx', _khash_primitive_helper],
'include_dirs': klib_include,
'deps': _khash_primitive_helper_dep},
'lib': {'sources': ['lib.pyx', 'src/parser/tokenizer.c'],
'include_dirs': [klib_include, inc_datetime]},
'missing': {'sources': ['missing.pyx'],
'include_dirs': [inc_datetime]},
'pandas_datetime': {'sources': ['tslibs/src/datetime/np_datetime.c',
'tslibs/src/datetime/np_datetime_strings.c',
'tslibs/src/datetime/date_conversions.c',
'tslibs/src/datetime/pd_datetime.c']},
#'include_dirs':
'lib': {'sources': ['lib.pyx', 'src/parser/tokenizer.c']},
'missing': {'sources': ['missing.pyx']},
'pandas_datetime': {'sources': ['src/vendored/numpy/datetime/np_datetime.c',
'src/vendored/numpy/datetime/np_datetime_strings.c',
'src/datetime/date_conversions.c',
'src/datetime/pd_datetime.c']},
'pandas_parser': {'sources': ['src/parser/tokenizer.c',
'src/parser/io.c',
'pd_parser.c'],
'include_dirs': [klib_include]},
'src/parser/pd_parser.c']},
'parsers': {'sources': ['parsers.pyx', 'src/parser/tokenizer.c', 'src/parser/io.c'],
'include_dirs': [klib_include, 'src'],
'deps': _khash_primitive_helper_dep},
'json': {'sources': ['src/ujson/python/ujson.c',
'src/ujson/python/objToJSON.c',
'src/ujson/python/JSONtoObj.c',
'src/ujson/lib/ultrajsonenc.c',
'src/ujson/lib/ultrajsondec.c'],
'include_dirs': ['tslibs/src/datetime', 'src/ujson/lib', 'src/ujson/python']},
'json': {'sources': ['src/vendored/ujson/python/ujson.c',
'src/vendored/ujson/python/objToJSON.c',
'src/vendored/ujson/python/JSONtoObj.c',
'src/vendored/ujson/lib/ultrajsonenc.c',
'src/vendored/ujson/lib/ultrajsondec.c']},
'ops': {'sources': ['ops.pyx']},
'ops_dispatch': {'sources': ['ops_dispatch.pyx']},
'properties': {'sources': ['properties.pyx']},
'reshape': {'sources': ['reshape.pyx']},
'sas': {'sources': ['sas.pyx']},
'byteswap': {'sources': ['byteswap.pyx']},
'sparse': {'sources': ['sparse.pyx', _sparse_op_helper]},
'tslib': {'sources': ['tslib.pyx'],
'include_dirs': inc_datetime},
'tslib': {'sources': ['tslib.pyx']},
'testing': {'sources': ['testing.pyx']},
'writers': {'sources': ['writers.pyx']}
}
Expand All @@ -118,7 +106,7 @@ foreach ext_name, ext_dict : libs_sources
ext_name,
ext_dict.get('sources'),
cython_args: ['--include-dir', meson.current_build_dir()],
include_directories: [inc_np] + ext_dict.get('include_dirs', ''),
include_directories: [inc_np, inc_pd],
dependencies: ext_dict.get('deps', ''),
subdir: 'pandas/_libs',
install: true
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ cdef:
int64_t DEFAULT_CHUNKSIZE = 256 * 1024


cdef extern from "headers/portable.h":
cdef extern from "pandas/portable.h":
# I *think* this is here so that strcasecmp is defined on Windows
# so we don't get
# `parsers.obj : error LNK2001: unresolved external symbol strcasecmp`
Expand All @@ -127,7 +127,7 @@ cdef extern from "headers/portable.h":
pass


cdef extern from "parser/tokenizer.h":
cdef extern from "pandas/parser/tokenizer.h":

ctypedef enum ParserState:
START_RECORD
Expand Down Expand Up @@ -245,7 +245,7 @@ cdef extern from "parser/tokenizer.h":

void COLITER_NEXT(coliter_t, const char *) nogil

cdef extern from "pd_parser.h":
cdef extern from "pandas/parser/pd_parser.h":
void *new_rd_source(object obj) except NULL

int del_rd_source(void *src)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ The full license is in the LICENSE file, distributed with this software.
// Conversion routines that are useful for serialization,
// but which don't interact with JSON objects directly

#include "date_conversions.h"
#include "np_datetime.h"
#include "np_datetime_strings.h"
#include "pandas/datetime/date_conversions.h"
#include "pandas/vendored/numpy/datetime/np_datetime.h"
#include "pandas/vendored/numpy/datetime/np_datetime_strings.h"

/*
* Function: scaleNanosecToUnit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
#include <Python.h>

#include "datetime.h"
#include "pd_datetime.h"
#include "pandas/datetime/pd_datetime.h"


static void pandas_datetime_destructor(PyObject *op) {
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/src/parser/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Distributed under the terms of the BSD Simplified License.
The full license is in the LICENSE file, distributed with this software.
*/

#include "io.h"
#include "pandas/parser/io.h"

/*
On-disk FILE, uncompressed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ Distributed under the terms of the BSD Simplified License.
*/
#define _PANDAS_PARSER_IMPL

#include "pd_parser.h"
#include "src/parser/io.h"
#include "pandas/parser/pd_parser.h"
#include "pandas/parser/io.h"

static int to_double(char *item, double *p_value, char sci, char decimal,
int *maybe_int) {
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/src/parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ GitHub. See Python Software Foundation License and BSD licenses for these.

*/

#include "tokenizer.h"
#include "pandas/parser/tokenizer.h"

#include <ctype.h>
#include <float.h>
#include <math.h>

#include "../headers/portable.h"
#include "pandas/portable.h"

void coliter_setup(coliter_t *self, parser_t *parser, int64_t i,
int64_t start) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
#include <numpy/arrayobject.h>
#include <numpy/arrayscalars.h>
#include <numpy/ndarraytypes.h>
#include "np_datetime.h"
#include "pandas/vendored/numpy/datetime/np_datetime.h"


const int days_per_month_table[2][12] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ This file implements string parsing and creation for NumPy datetime.
#include <numpy/arrayscalars.h>
#include <numpy/ndarraytypes.h>

#include "np_datetime.h"
#include "np_datetime_strings.h"
#include "pandas/vendored/numpy/datetime/np_datetime.h"
#include "pandas/vendored/numpy/datetime/np_datetime_strings.h"


/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Numeric decoder derived from TCL library
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include "ultrajson.h"
#include "pandas/vendored/ujson/lib/ultrajson.h"

#ifndef TRUE
#define TRUE 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Numeric decoder derived from TCL library
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "ultrajson.h"
#include "pandas/vendored/ujson/lib/ultrajson.h"

#ifndef TRUE
#define TRUE 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Numeric decoder derived from TCL library
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <numpy/arrayobject.h>
#include <ultrajson.h>
#include "pandas/vendored/ujson/lib/ultrajson.h"

#define PRINTMARK()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ Numeric decoder derived from TCL library
#include <numpy/arrayscalars.h>
#include <numpy/ndarraytypes.h>
#include <numpy/npy_math.h>
#include <ultrajson.h>
#include "pandas/vendored/ujson/lib/ultrajson.h"
#include "datetime.h"
#include "pd_datetime.h"
#include "pandas/datetime/pd_datetime.h"

npy_int64 get_nat(void) { return NPY_MIN_INT64; }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Numeric decoder derived from TCL library
* Copyright (c) 1994 Sun Microsystems, Inc.
*/

#include "version.h"
#include "pandas/vendored/ujson/python/version.h"
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY
Expand Down
Loading