Skip to content

CLN: cleanup libs cimports, remove is_timestamp #18663

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Dec 8, 2017
80 changes: 35 additions & 45 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
# Copyright (c) 2012, Lambda Foundry, Inc.
# See LICENSE for the license
import os
import sys
import time
import warnings

from csv import QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_NONE

from libc.stdio cimport fopen, fclose
from libc.stdlib cimport malloc, free
from libc.string cimport strncpy, strlen, strcmp, strcasecmp
cimport libc.stdio as stdio
import warnings

from csv import QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_NONE
cimport cython
from cython cimport Py_ssize_t

from cpython cimport (PyObject, PyBytes_FromString,
PyBytes_AsString, PyBytes_Check,
PyUnicode_Check, PyUnicode_AsUTF8String,
PyErr_Occurred, PyErr_Fetch)
from cpython.ref cimport Py_XDECREF
from pandas.errors import (ParserError, DtypeWarning,
EmptyDataError, ParserWarning)

# Import CParserError as alias of ParserError for backwards compatibility.
# Ultimately, we want to remove this import. See gh-12665 and gh-14479.
CParserError = ParserError

cdef extern from "Python.h":
object PyUnicode_FromString(char *v)
Expand All @@ -29,15 +30,24 @@ cdef extern from "Python.h":
cdef extern from "stdlib.h":
void memcpy(void *dst, void *src, size_t n)

cimport cython
cimport numpy as cnp

import numpy as np
cimport numpy as cnp
from numpy cimport ndarray, uint8_t, uint64_t, int64_t
cnp.import_array()

import numpy as np
cimport util
from util cimport UINT64_MAX, INT64_MAX, INT64_MIN
import lib

from khash cimport (
khiter_t,
kh_str_t, kh_init_str, kh_put_str, kh_exist_str,
kh_get_str, kh_destroy_str,
kh_float64_t, kh_get_float64, kh_destroy_float64,
kh_put_float64, kh_init_float64,
kh_strbox_t, kh_put_strbox, kh_get_strbox, kh_init_strbox,
kh_destroy_strbox)

import pandas._libs.lib as lib
import pandas.compat as compat
from pandas.core.dtypes.common import (
is_categorical_dtype, CategoricalDtype,
Expand All @@ -47,46 +57,21 @@ from pandas.core.dtypes.common import (
pandas_dtype)
from pandas.core.categorical import Categorical
from pandas.core.dtypes.concat import union_categoricals

import pandas.io.common as com

import time
import os

cnp.import_array()
from pandas.errors import (ParserError, DtypeWarning,
EmptyDataError, ParserWarning)

from khash cimport (
khiter_t,
kh_str_t, kh_init_str, kh_put_str, kh_exist_str,
kh_get_str, kh_destroy_str,
kh_float64_t, kh_get_float64, kh_destroy_float64,
kh_put_float64, kh_init_float64,
kh_strbox_t, kh_put_strbox, kh_get_strbox, kh_init_strbox,
kh_destroy_strbox)
# Import CParserError as alias of ParserError for backwards compatibility.
# Ultimately, we want to remove this import. See gh-12665 and gh-14479.
CParserError = ParserError

import sys

cdef bint PY3 = (sys.version_info[0] >= 3)

cdef double INF = <double> np.inf
cdef double NEGINF = -INF

cdef extern from "headers/stdint.h":
enum: UINT8_MAX
enum: UINT16_MAX
enum: UINT32_MAX
enum: UINT64_MAX
enum: INT8_MIN
enum: INT8_MAX
enum: INT16_MIN
enum: INT16_MAX
enum: INT32_MAX
enum: INT32_MIN
enum: INT64_MAX
enum: INT64_MIN

cdef extern from "headers/portable.h":
pass

cdef extern from "errno.h":
int errno
Expand All @@ -96,6 +81,11 @@ try:
except NameError:
basestring = str

cdef extern from "src/numpy_helper.h":
object sarr_from_data(cnp.dtype, int length, void* data)
void transfer_object_column(char *dst, char *src, size_t stride,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chris-b1, @gfyoung don't we have an issue to remove support for structured arrays entirely? is that already deprecated?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if not let's create one (and deprecate in 0.22)

size_t length)

cdef extern from "parser/tokenizer.h":

ctypedef enum ParserState:
Expand Down Expand Up @@ -2360,7 +2350,7 @@ def _to_structured_array(dict columns, object names, object usecols):
# We own the data.
buf = <char*> malloc(length * stride)

recs = util.sarr_from_data(dt, length, buf)
recs = sarr_from_data(dt, length, buf)
assert(recs.flags.owndata)

for i in range(nfields):
Expand All @@ -2385,7 +2375,7 @@ cdef _fill_structured_column(char *dst, char* src, int64_t elsize,
int64_t i

if incref:
util.transfer_object_column(dst, src, stride, length)
transfer_object_column(dst, src, stride, length)
else:
for i in range(length):
memcpy(dst, src, elsize)
Expand Down
3 changes: 0 additions & 3 deletions pandas/_libs/src/util.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ cdef extern from "numpy_helper.h":
object char_to_string(char*)
void transfer_object_column(char *dst, char *src, size_t stride,
size_t length)
object sarr_from_data(cnp.dtype, int length, void* data)
object unbox_if_zerodim(object arr)

ctypedef fused numeric:
Expand Down Expand Up @@ -100,8 +99,6 @@ cdef inline set_value_at(ndarray arr, object loc, object value):

set_value_at_unsafe(arr, loc, value)

cdef inline int is_contiguous(ndarray arr):
return cnp.PyArray_CHKFLAGS(arr, cnp.NPY_C_CONTIGUOUS)

cdef inline is_array(object o):
return cnp.PyArray_Check(o)
Expand Down
53 changes: 18 additions & 35 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
@@ -1,24 +1,19 @@
# -*- coding: utf-8 -*-
# cython: profile=False
# cython: linetrace=False
# distutils: define_macros=CYTHON_TRACE=0
# distutils: define_macros=CYTHON_TRACE_NOGIL=0

cimport numpy as np
from numpy cimport int64_t, ndarray, float64_t
import numpy as np
np.import_array()


from cpython cimport PyTypeObject, PyFloat_Check

cdef extern from "Python.h":
cdef PyTypeObject *Py_TYPE(object)
from cpython cimport PyFloat_Check

from util cimport (is_integer_object, is_float_object, is_string_object,
is_datetime64_object)

from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
PyDateTime_CheckExact,
PyDateTime_IMPORT,
timedelta, datetime, date)
# import datetime C API
Expand Down Expand Up @@ -48,10 +43,8 @@ UTC = pytz.utc

from tslibs.timedeltas cimport cast_from_unit
from tslibs.timedeltas import Timedelta
from tslibs.timezones cimport (
is_utc, is_tzlocal, is_fixed_offset,
treat_tz_as_pytz,
get_dst_info)
from tslibs.timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
treat_tz_as_pytz, get_dst_info)
from tslibs.conversion cimport (tz_convert_single, _TSObject,
convert_datetime_to_tsobject,
get_datetime64_nanos)
Expand Down Expand Up @@ -205,13 +198,6 @@ def ints_to_pytimedelta(ndarray[int64_t] arr, box=False):
return result


cdef PyTypeObject* ts_type = <PyTypeObject*> Timestamp


cdef inline bint is_timestamp(object o):
return Py_TYPE(o) == ts_type # isinstance(o, Timestamp)


def _test_parse_iso8601(object ts):
"""
TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used
Expand Down Expand Up @@ -334,14 +320,6 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None,
return result


# const for parsers

_MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL',
'AUG', 'SEP', 'OCT', 'NOV', 'DEC']
_MONTH_NUMBERS = {k: i for i, k in enumerate(_MONTHS)}
_MONTH_ALIASES = {(k + 1): v for k, v in enumerate(_MONTHS)}


cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
"""
convert the ndarray according to the unit
Expand All @@ -361,7 +339,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
bint is_ignore = errors=='ignore'
bint is_coerce = errors=='coerce'
bint is_raise = errors=='raise'
bint need_to_iterate=True
bint need_to_iterate = True
ndarray[int64_t] iresult
ndarray[object] oresult

Expand All @@ -384,7 +362,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
mask = iresult == iNaT
iresult[mask] = 0
fvalues = iresult.astype('f8') * m
need_to_iterate=False
need_to_iterate = False
except:
pass

Expand All @@ -395,7 +373,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
or (fvalues > _NS_UPPER_BOUND).any()):
raise OutOfBoundsDatetime(
"cannot convert input with unit '{0}'".format(unit))
result = (iresult *m).astype('M8[ns]')
result = (iresult * m).astype('M8[ns]')
iresult = result.view('i8')
iresult[mask] = iNaT
return result
Expand Down Expand Up @@ -546,7 +524,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
'utc=True')
else:
iresult[i] = pydatetime_to_dt64(val, &dts)
if is_timestamp(val):
if not PyDateTime_CheckExact(val):
# i.e. a Timestamp object
iresult[i] += val.nanosecond
try:
check_dts_bounds(&dts)
Expand Down Expand Up @@ -753,11 +732,15 @@ cpdef normalize_date(object dt):
-------
normalized : datetime.datetime or Timestamp
"""
if is_timestamp(dt):
return dt.replace(hour=0, minute=0, second=0, microsecond=0,
nanosecond=0)
elif PyDateTime_Check(dt):
return dt.replace(hour=0, minute=0, second=0, microsecond=0)
if PyDateTime_Check(dt):
if not PyDateTime_CheckExact(dt):
# i.e. a Timestamp object
return dt.replace(hour=0, minute=0, second=0, microsecond=0,
nanosecond=0)
else:
# regular datetime object
return dt.replace(hour=0, minute=0, second=0, microsecond=0)
# TODO: Make sure DST crossing is handled correctly here
elif PyDate_Check(dt):
return datetime(dt.year, dt.month, dt.day)
else:
Expand Down
8 changes: 4 additions & 4 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ from util cimport (is_string_object,
is_integer_object, is_float_object)

from timedeltas cimport cast_from_unit
from timezones cimport (
is_utc, is_tzlocal, is_fixed_offset,
treat_tz_as_dateutil, treat_tz_as_pytz,
get_utcoffset, get_dst_info, get_timezone, maybe_get_tz)
from timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
treat_tz_as_dateutil, treat_tz_as_pytz,
get_utcoffset, get_dst_info,
get_timezone, maybe_get_tz)
from parsing import parse_datetime_string

from nattype import nat_strings, NaT
Expand Down
3 changes: 0 additions & 3 deletions pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
# -*- coding: utf-8 -*-
# cython: profile=False
# cython: linetrace=False
# distutils: define_macros=CYTHON_TRACE=0
# distutils: define_macros=CYTHON_TRACE_NOGIL=0
"""
Functions for accessing attributes of Timestamp/datetime64/datetime-like
objects and arrays
Expand Down
3 changes: 0 additions & 3 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
# -*- coding: utf-8 -*-
# cython: profile=False
# cython: linetrace=False
# distutils: define_macros=CYTHON_TRACE=0
# distutils: define_macros=CYTHON_TRACE_NOGIL=0
"""
Parsing functions for datetime and datetime-like strings.
"""
Expand Down
14 changes: 6 additions & 8 deletions pandas/_libs/tslibs/resolution.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,18 @@ np.import_array()

from util cimport is_string_object, get_nat

from pandas._libs.khash cimport (
khiter_t,
kh_destroy_int64, kh_put_int64,
kh_init_int64, kh_int64_t,
kh_resize_int64, kh_get_int64)
from pandas._libs.khash cimport (khiter_t,
kh_destroy_int64, kh_put_int64,
kh_init_int64, kh_int64_t,
kh_resize_int64, kh_get_int64)

from cpython.datetime cimport datetime

from np_datetime cimport (pandas_datetimestruct,
dtstruct_to_dt64, dt64_to_dtstruct)
from frequencies cimport get_freq_code
from timezones cimport (
is_utc, is_tzlocal,
maybe_get_tz, get_dst_info, get_utcoffset)
from timezones cimport (is_utc, is_tzlocal,
maybe_get_tz, get_dst_info, get_utcoffset)
from fields import build_field_sarray
from conversion import tz_convert

Expand Down
3 changes: 0 additions & 3 deletions pandas/_libs/tslibs/timezones.pyx
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
# -*- coding: utf-8 -*-
# cython: profile=False
# cython: linetrace=False
# distutils: define_macros=CYTHON_TRACE=0
# distutils: define_macros=CYTHON_TRACE_NOGIL=0

cimport cython
from cython cimport Py_ssize_t
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexes/period/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import pandas.util.testing as tm
import pandas.core.indexes.period as period
from pandas.compat import lrange
from pandas.tseries.frequencies import get_freq, MONTHS
from pandas.tseries.frequencies import get_freq
from pandas._libs.tslibs.resolution import _MONTHS as MONTHS
from pandas._libs.tslibs.period import period_ordinal, period_asfreq
from pandas import (PeriodIndex, Period, DatetimeIndex, Timestamp, Series,
date_range, to_datetime, period_range)
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@
from pandas.core.base import SpecificationError, AbstractMethodError
from pandas.errors import UnsupportedFunctionCall
from pandas.core.groupby import DataError
from pandas._libs.tslibs.resolution import DAYS
from pandas.tseries.frequencies import MONTHS
from pandas._libs.tslibs.resolution import DAYS, _MONTHS as MONTHS
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should fix this this naming mistmacth in resolution (add to list)

from pandas.tseries.frequencies import to_offset
from pandas.core.indexes.datetimes import date_range
from pandas.tseries.offsets import Minute, BDay
Expand Down
Loading