-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: ISO8601-compliant datetime string conversion in iterrows()
and Series construction.
#19762
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
518ab47
156adbb
6d06cf1
f2617dd
09ae4e5
7ea24ec
fac665b
068fde2
8ceeb62
d105732
26fd14f
ab5214a
37aa8dd
7d9b27d
389a9d9
959ae62
700fa38
f8159c2
3708f4b
cb798d2
2e27f22
75268a8
8384d5e
1665922
21f7c15
5dc7a37
f9240b5
6e67070
9e6e2a7
27fdfac
6aea33d
998920c
14946f8
9e11b43
2fe7057
910f759
0b72b72
acdec06
e69f4ab
5b12cfc
a9d85ae
a5a1f57
793ea23
08d2718
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -710,10 +710,16 @@ def iteritems(self): | |
for i, k in enumerate(self.columns): | ||
yield k, self._ixs(i, axis=1) | ||
|
||
def iterrows(self): | ||
def iterrows(self, require_iso8601=False): | ||
""" | ||
Iterate over DataFrame rows as (index, Series) pairs. | ||
|
||
Parameters | ||
---------- | ||
require_iso8601 : boolean, default False | ||
If True, only try to infer ISO8601-compliant datetime string in | ||
iterated rows. | ||
|
||
Notes | ||
----- | ||
|
||
|
@@ -755,7 +761,10 @@ def iterrows(self): | |
columns = self.columns | ||
klass = self._constructor_sliced | ||
for k, v in zip(self.index, self.values): | ||
s = klass(v, index=columns, name=k) | ||
s = klass(v, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you don't need to add this here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. reverted. |
||
index=columns, | ||
name=k, | ||
require_iso8601=require_iso8601) | ||
yield k, s | ||
|
||
def itertuples(self, index=True, name="Pandas"): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -146,7 +146,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): | |
'from_csv', 'valid']) | ||
|
||
def __init__(self, data=None, index=None, dtype=None, name=None, | ||
copy=False, fastpath=False): | ||
copy=False, fastpath=False, require_iso8601=False): | ||
|
||
# we are called internally, so short-circuit | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you don't need this anywhere here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. reverted. |
||
if fastpath: | ||
|
@@ -236,7 +236,8 @@ def __init__(self, data=None, index=None, dtype=None, name=None, | |
data = data.copy() | ||
else: | ||
data = _sanitize_array(data, index, dtype, copy, | ||
raise_cast_failure=True) | ||
raise_cast_failure=True, | ||
require_iso8601=require_iso8601) | ||
|
||
data = SingleBlockManager(data, index, fastpath=True) | ||
|
||
|
@@ -3129,7 +3130,7 @@ def _sanitize_index(data, index, copy=False): | |
|
||
|
||
def _sanitize_array(data, index, dtype=None, copy=False, | ||
raise_cast_failure=False): | ||
raise_cast_failure=False, require_iso8601=False): | ||
""" sanitize input data to an ndarray, copy if specified, coerce to the | ||
dtype if specified | ||
""" | ||
|
@@ -3145,15 +3146,17 @@ def _sanitize_array(data, index, dtype=None, copy=False, | |
else: | ||
data = data.copy() | ||
|
||
def _try_cast(arr, take_fast_path): | ||
def _try_cast(arr, take_fast_path, require_iso8601=require_iso8601): | ||
|
||
# perf shortcut as this is the most common case | ||
if take_fast_path: | ||
if maybe_castable(arr) and not copy and dtype is None: | ||
return arr | ||
|
||
try: | ||
subarr = maybe_cast_to_datetime(arr, dtype) | ||
subarr = maybe_cast_to_datetime(arr, | ||
dtype, | ||
require_iso8601=require_iso8601) | ||
if not is_extension_type(subarr): | ||
subarr = np.array(subarr, dtype=dtype, copy=copy) | ||
except (ValueError, TypeError): | ||
|
@@ -3211,7 +3214,9 @@ def _try_cast(arr, take_fast_path): | |
else: | ||
subarr = maybe_convert_platform(data) | ||
|
||
subarr = maybe_cast_to_datetime(subarr, dtype) | ||
subarr = maybe_cast_to_datetime(subarr, | ||
dtype, | ||
require_iso8601=require_iso8601) | ||
|
||
elif isinstance(data, range): | ||
# GH 16804 | ||
|
@@ -3233,7 +3238,9 @@ def _try_cast(arr, take_fast_path): | |
dtype, value = infer_dtype_from_scalar(value) | ||
else: | ||
# need to possibly convert the value here | ||
value = maybe_cast_to_datetime(value, dtype) | ||
value = maybe_cast_to_datetime(subarr, | ||
dtype, | ||
require_iso8601=require_iso8601) | ||
|
||
subarr = construct_1d_arraylike_from_scalar( | ||
value, len(index), dtype) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -105,8 +105,8 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None): | |
|
||
def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, | ||
utc=None, box=True, format=None, exact=True, | ||
unit=None, infer_datetime_format=False, origin='unix', | ||
cache=False): | ||
unit=None, infer_datetime_format=False, require_iso8601=False, | ||
origin='unix', cache=False): | ||
""" | ||
Convert argument to datetime. | ||
|
||
|
@@ -167,6 +167,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, | |
datetime strings, and if it can be inferred, switch to a faster | ||
method of parsing them. In some cases this can increase the parsing | ||
speed by ~5-10x. | ||
require_iso8601 : boolean, default False | ||
If True, only try to infer ISO8601-compliant datetime string. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a versionadded tag (0.23.0) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. string -> strings There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
origin : scalar, default is 'unix' | ||
Define the reference date. The numeric values would be parsed as number | ||
of units (defined by `unit`) since this reference date. | ||
|
@@ -273,7 +275,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, | |
|
||
tz = 'utc' if utc else None | ||
|
||
def _convert_listlike(arg, box, format, name=None, tz=tz): | ||
def _convert_listlike(arg, box, format, name=None, tz=tz, | ||
require_iso8601=require_iso8601): | ||
|
||
if isinstance(arg, (list, tuple)): | ||
arg = np.array(arg, dtype='O') | ||
|
@@ -313,11 +316,8 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): | |
'1-d array, or Series') | ||
|
||
arg = _ensure_object(arg) | ||
require_iso8601 = False | ||
|
||
if infer_datetime_format and format is None: | ||
format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) | ||
|
||
if format is not None: | ||
# There is a special fast-path for iso8601 formatted | ||
# datetime strings, so in those cases don't use the inferred | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you dont' need to add this require_iso8601 anywhere here, except for in the actual
to_datetime()
call, where it should be True.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.