Skip to content

Separate parsing functions out from tslib #17363

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Sep 26, 2017
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
b0428ec
Separate parsing functions out from tslib
jbrockmendel Aug 28, 2017
b19a31e
flake8 whitespace fixup
jbrockmendel Aug 28, 2017
fa6def5
Address reviewer comments
jbrockmendel Aug 29, 2017
2cd2ab6
Fix __doc__ cython complaint
jbrockmendel Aug 29, 2017
c765e26
lint fixup
jbrockmendel Aug 30, 2017
b3ca3c0
Reviewer comments; remove cython decorators
jbrockmendel Sep 6, 2017
e36f8be
kludge to move tslibs/parsing to make asv work
jbrockmendel Sep 6, 2017
c52c796
kludge to move tslibs/parsing to make asv work
jbrockmendel Sep 6, 2017
f5259ea
flake8 whitespace fixup
jbrockmendel Sep 7, 2017
d1765ab
Dummy commit to force CI
jbrockmendel Sep 7, 2017
ceac008
Merge branch 'master' of https://github.com/pandas-dev/pandas into ts…
jbrockmendel Sep 8, 2017
e3995be
Whitespace fixup
jbrockmendel Sep 8, 2017
6db3e3c
rebase
jbrockmendel Sep 8, 2017
cfcb002
Merge branch 'master' of https://github.com/pandas-dev/pandas into ts…
jbrockmendel Sep 15, 2017
ff57861
Remove duplicate file
jbrockmendel Sep 15, 2017
62cc7b0
Reviewer comments; import try_parse_xyz directly from tslibs.parsing
jbrockmendel Sep 17, 2017
21046b3
flake8 fixup
jbrockmendel Sep 18, 2017
a5f8eca
Merge branch 'master' of https://github.com/pandas-dev/pandas into ts…
jbrockmendel Sep 23, 2017
364a6b0
keep _DATEUTIL_LEXER_SPLIT private to tslibs.parsing
jbrockmendel Sep 24, 2017
f89d11e
Merge branch 'master' of https://github.com/pandas-dev/pandas into ts…
jbrockmendel Sep 25, 2017
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pandas/_libs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ from tslib cimport (
_get_dst_info,
_nat_scalar_rules)

from tslibs.parsing import parse_time_string, NAT_SENTINEL

from pandas.tseries import offsets
from pandas.core.tools.datetimes import parse_time_string
from pandas.tseries import frequencies

cdef int64_t NPY_NAT = util.get_nat()
Expand Down Expand Up @@ -1178,6 +1179,8 @@ class Period(_Period):
value = str(value)
value = value.upper()
dt, _, reso = parse_time_string(value, freq)
if dt is NAT_SENTINEL:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why this change?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tslibs.parsing does not have NaT in the namespace, so it returns NAT_SENTINEL in places where it otherwise would return NaT. That should be wrapped in tslib, will update.

ordinal = iNaT

if freq is None:
try:
Expand Down
166 changes: 7 additions & 159 deletions pandas/_libs/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ iNaT = util.get_nat()

cdef bint PY2 = sys.version_info[0] == 2

from pandas._libs.tslibs.parsing import (
try_parse_dates,
try_parse_date_and_time,
try_parse_year_month_day,
try_parse_datetime_components)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you actually need to import these here, rather import them directly where they are used


from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX,
INT8_MIN, INT8_MAX, INT16_MIN, INT16_MAX,
INT32_MAX, INT32_MIN, INT64_MAX, INT64_MIN)
Expand Down Expand Up @@ -1383,165 +1390,6 @@ def convert_sql_column(x):
return maybe_convert_objects(x, try_float=1)


def try_parse_dates(ndarray[object] values, parser=None,
dayfirst=False, default=None):
cdef:
Py_ssize_t i, n
ndarray[object] result

n = len(values)
result = np.empty(n, dtype='O')

if parser is None:
if default is None: # GH2618
date=datetime.now()
default=datetime(date.year, date.month, 1)

try:
from dateutil.parser import parse
parse_date = lambda x: parse(x, dayfirst=dayfirst, default=default)
except ImportError: # pragma: no cover
def parse_date(s):
try:
return datetime.strptime(s, '%m/%d/%Y')
except Exception:
return s
# EAFP here
try:
for i from 0 <= i < n:
if values[i] == '':
result[i] = np.nan
else:
result[i] = parse_date(values[i])
except Exception:
# failed
return values
else:
parse_date = parser

try:
for i from 0 <= i < n:
if values[i] == '':
result[i] = np.nan
else:
result[i] = parse_date(values[i])
except Exception:
# raise if passed parser and it failed
raise

return result


def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times,
date_parser=None, time_parser=None,
dayfirst=False, default=None):
cdef:
Py_ssize_t i, n
ndarray[object] result

from datetime import date, time, datetime, timedelta

n = len(dates)
if len(times) != n:
raise ValueError('Length of dates and times must be equal')
result = np.empty(n, dtype='O')

if date_parser is None:
if default is None: # GH2618
date=datetime.now()
default=datetime(date.year, date.month, 1)

try:
from dateutil.parser import parse
parse_date = lambda x: parse(x, dayfirst=dayfirst, default=default)
except ImportError: # pragma: no cover
def parse_date(s):
try:
return date.strptime(s, '%m/%d/%Y')
except Exception:
return s
else:
parse_date = date_parser

if time_parser is None:
try:
from dateutil.parser import parse
parse_time = lambda x: parse(x)
except ImportError: # pragma: no cover
def parse_time(s):
try:
return time.strptime(s, '%H:%M:%S')
except Exception:
return s

else:
parse_time = time_parser

for i from 0 <= i < n:
d = parse_date(str(dates[i]))
t = parse_time(str(times[i]))
result[i] = datetime(d.year, d.month, d.day,
t.hour, t.minute, t.second)

return result


def try_parse_year_month_day(ndarray[object] years, ndarray[object] months,
ndarray[object] days):
cdef:
Py_ssize_t i, n
ndarray[object] result

from datetime import datetime

n = len(years)
if len(months) != n or len(days) != n:
raise ValueError('Length of years/months/days must all be equal')
result = np.empty(n, dtype='O')

for i from 0 <= i < n:
result[i] = datetime(int(years[i]), int(months[i]), int(days[i]))

return result


def try_parse_datetime_components(ndarray[object] years,
ndarray[object] months,
ndarray[object] days,
ndarray[object] hours,
ndarray[object] minutes,
ndarray[object] seconds):

cdef:
Py_ssize_t i, n
ndarray[object] result
int secs
double float_secs
double micros

from datetime import datetime

n = len(years)
if (len(months) != n or len(days) != n or len(hours) != n or
len(minutes) != n or len(seconds) != n):
raise ValueError('Length of all datetime components must be equal')
result = np.empty(n, dtype='O')

for i from 0 <= i < n:
float_secs = float(seconds[i])
secs = int(float_secs)

micros = float_secs - secs
if micros > 0:
micros = micros * 1000000

result[i] = datetime(int(years[i]), int(months[i]), int(days[i]),
int(hours[i]), int(minutes[i]), secs,
int(micros))

return result


def sanitize_objects(ndarray[object] values, set na_values,
convert_empty=True):
cdef:
Expand Down
Loading