Skip to content

Commit 358056b

Browse files
jbrockmendelNo-Stream
authored andcommitted
Move normalization funcs up to conversion (pandas-dev#18086)
closes pandas-dev#17944
1 parent 6f988d7 commit 358056b

File tree

9 files changed

+264
-213
lines changed

9 files changed

+264
-213
lines changed

pandas/_libs/groupby.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
# cython: profile=False
23

34
cimport numpy as cnp

pandas/_libs/tslib.pyx

+2-147
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,8 @@ from tslibs.conversion cimport (tz_convert_single, _TSObject,
9797
convert_to_tsobject,
9898
convert_datetime_to_tsobject,
9999
get_datetime64_nanos)
100-
from tslibs.conversion import (
101-
tz_localize_to_utc, tz_convert,
102-
tz_convert_single)
100+
from tslibs.conversion import (tz_localize_to_utc,
101+
tz_convert_single, date_normalize)
103102

104103
from tslibs.nattype import NaT, nat_strings
105104
from tslibs.nattype cimport _checknull_with_nat
@@ -1849,26 +1848,6 @@ cdef inline _to_i8(object val):
18491848
return val
18501849

18511850

1852-
cpdef pydt_to_i8(object pydt):
1853-
"""
1854-
Convert to int64 representation compatible with numpy datetime64; converts
1855-
to UTC
1856-
"""
1857-
cdef:
1858-
_TSObject ts
1859-
1860-
ts = convert_to_tsobject(pydt, None, None, 0, 0)
1861-
1862-
return ts.value
1863-
1864-
1865-
def i8_to_pydt(int64_t i8, object tzinfo=None):
1866-
"""
1867-
Inverse of pydt_to_i8
1868-
"""
1869-
return Timestamp(i8)
1870-
1871-
18721851
# ----------------------------------------------------------------------
18731852
# Accessors
18741853

@@ -1892,130 +1871,6 @@ def get_time_micros(ndarray[int64_t] dtindex):
18921871
return micros
18931872

18941873

1895-
cdef int64_t DAY_NS = 86400000000000LL
1896-
1897-
1898-
@cython.wraparound(False)
1899-
@cython.boundscheck(False)
1900-
def date_normalize(ndarray[int64_t] stamps, tz=None):
1901-
cdef:
1902-
Py_ssize_t i, n = len(stamps)
1903-
pandas_datetimestruct dts
1904-
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
1905-
1906-
if tz is not None:
1907-
tz = maybe_get_tz(tz)
1908-
result = _normalize_local(stamps, tz)
1909-
else:
1910-
with nogil:
1911-
for i in range(n):
1912-
if stamps[i] == NPY_NAT:
1913-
result[i] = NPY_NAT
1914-
continue
1915-
dt64_to_dtstruct(stamps[i], &dts)
1916-
result[i] = _normalized_stamp(&dts)
1917-
1918-
return result
1919-
1920-
1921-
@cython.wraparound(False)
1922-
@cython.boundscheck(False)
1923-
cdef _normalize_local(ndarray[int64_t] stamps, object tz):
1924-
cdef:
1925-
Py_ssize_t n = len(stamps)
1926-
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
1927-
ndarray[int64_t] trans, deltas, pos
1928-
pandas_datetimestruct dts
1929-
1930-
if is_utc(tz):
1931-
with nogil:
1932-
for i in range(n):
1933-
if stamps[i] == NPY_NAT:
1934-
result[i] = NPY_NAT
1935-
continue
1936-
dt64_to_dtstruct(stamps[i], &dts)
1937-
result[i] = _normalized_stamp(&dts)
1938-
elif is_tzlocal(tz):
1939-
for i in range(n):
1940-
if stamps[i] == NPY_NAT:
1941-
result[i] = NPY_NAT
1942-
continue
1943-
dt64_to_dtstruct(stamps[i], &dts)
1944-
dt = datetime(dts.year, dts.month, dts.day, dts.hour,
1945-
dts.min, dts.sec, dts.us, tz)
1946-
delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000
1947-
dt64_to_dtstruct(stamps[i] + delta, &dts)
1948-
result[i] = _normalized_stamp(&dts)
1949-
else:
1950-
# Adjust datetime64 timestamp, recompute datetimestruct
1951-
trans, deltas, typ = get_dst_info(tz)
1952-
1953-
_pos = trans.searchsorted(stamps, side='right') - 1
1954-
if _pos.dtype != np.int64:
1955-
_pos = _pos.astype(np.int64)
1956-
pos = _pos
1957-
1958-
# statictzinfo
1959-
if typ not in ['pytz', 'dateutil']:
1960-
for i in range(n):
1961-
if stamps[i] == NPY_NAT:
1962-
result[i] = NPY_NAT
1963-
continue
1964-
dt64_to_dtstruct(stamps[i] + deltas[0], &dts)
1965-
result[i] = _normalized_stamp(&dts)
1966-
else:
1967-
for i in range(n):
1968-
if stamps[i] == NPY_NAT:
1969-
result[i] = NPY_NAT
1970-
continue
1971-
dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts)
1972-
result[i] = _normalized_stamp(&dts)
1973-
1974-
return result
1975-
1976-
cdef inline int64_t _normalized_stamp(pandas_datetimestruct *dts) nogil:
1977-
dts.hour = 0
1978-
dts.min = 0
1979-
dts.sec = 0
1980-
dts.us = 0
1981-
dts.ps = 0
1982-
return dtstruct_to_dt64(dts)
1983-
1984-
1985-
def dates_normalized(ndarray[int64_t] stamps, tz=None):
1986-
cdef:
1987-
Py_ssize_t i, n = len(stamps)
1988-
ndarray[int64_t] trans, deltas
1989-
pandas_datetimestruct dts
1990-
1991-
if tz is None or is_utc(tz):
1992-
for i in range(n):
1993-
dt64_to_dtstruct(stamps[i], &dts)
1994-
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
1995-
return False
1996-
elif is_tzlocal(tz):
1997-
for i in range(n):
1998-
dt64_to_dtstruct(stamps[i], &dts)
1999-
dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min,
2000-
dts.sec, dts.us, tz)
2001-
dt = dt + tz.utcoffset(dt)
2002-
if (dt.hour + dt.minute + dt.second + dt.microsecond) > 0:
2003-
return False
2004-
else:
2005-
trans, deltas, typ = get_dst_info(tz)
2006-
2007-
for i in range(n):
2008-
# Adjust datetime64 timestamp, recompute datetimestruct
2009-
pos = trans.searchsorted(stamps[i]) - 1
2010-
inf = tz._transition_info[pos]
2011-
2012-
dt64_to_dtstruct(stamps[i] + deltas[pos], &dts)
2013-
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
2014-
return False
2015-
2016-
return True
2017-
2018-
20191874
# ----------------------------------------------------------------------
20201875
# Some general helper functions
20211876

pandas/_libs/tslibs/conversion.pxd

+2
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,5 @@ cdef void _localize_tso(_TSObject obj, object tz)
2626
cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2)
2727

2828
cdef int64_t get_datetime64_nanos(object val) except? -1
29+
30+
cpdef int64_t pydt_to_i8(object pydt) except? -1

0 commit comments

Comments
 (0)