From 12d0b4db65fb681b7aca30209711ceb997f3d4b7 Mon Sep 17 00:00:00 2001 From: matteosantama Date: Tue, 19 May 2020 23:24:10 +0000 Subject: [PATCH 01/37] TST: GH28813 test .diff() on Sparse dtype --- pandas/tests/frame/methods/test_diff.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index 6a9248e1cba1e..4ab0e80b73d90 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -36,6 +36,14 @@ def test_diff(self, datetime_frame): ).astype("float64") tm.assert_frame_equal(result, expected) + # Result should be the same for sparse df, see GH28813 + arr = [[0, 1], [1, 0]] + normal = pd.DataFrame(arr) + sparse = pd.DataFrame(arr, dtype='Sparse[int]') + # we don't check dtype because one is sparse and the other isn't + tm.assert_frame_equal(normal.diff(), sparse.diff(), check_dtype=False) + + @pytest.mark.parametrize("tz", [None, "UTC"]) def test_diff_datetime_axis0(self, tz): # GH#18578 From 02c4a85908d8cb6906feeb539be5b694e746a1a4 Mon Sep 17 00:00:00 2001 From: matteosantama Date: Wed, 20 May 2020 01:02:42 +0000 Subject: [PATCH 02/37] TST: GH28813 test .diff() on Sparse dtype --- pandas/tests/frame/methods/test_diff.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index 4ab0e80b73d90..2640e2192d798 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -39,11 +39,10 @@ def test_diff(self, datetime_frame): # Result should be the same for sparse df, see GH28813 arr = [[0, 1], [1, 0]] normal = pd.DataFrame(arr) - sparse = pd.DataFrame(arr, dtype='Sparse[int]') + sparse = pd.DataFrame(arr, dtype="Sparse[int]") # we don't check dtype because one is sparse and the other isn't tm.assert_frame_equal(normal.diff(), sparse.diff(), check_dtype=False) - @pytest.mark.parametrize("tz", [None, "UTC"]) def test_diff_datetime_axis0(self, tz): # GH#18578 From 7e3256ba22eb4957c35ff582282029f0904332b7 Mon Sep 17 00:00:00 2001 From: matteosantama Date: Wed, 20 May 2020 16:38:12 +0000 Subject: [PATCH 03/37] TST: GH28813 pull sparse diff() test into its own function --- pandas/tests/frame/methods/test_diff.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index 2640e2192d798..e876e40aa2eb1 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -36,13 +36,6 @@ def test_diff(self, datetime_frame): ).astype("float64") tm.assert_frame_equal(result, expected) - # Result should be the same for sparse df, see GH28813 - arr = [[0, 1], [1, 0]] - normal = pd.DataFrame(arr) - sparse = pd.DataFrame(arr, dtype="Sparse[int]") - # we don't check dtype because one is sparse and the other isn't - tm.assert_frame_equal(normal.diff(), sparse.diff(), check_dtype=False) - @pytest.mark.parametrize("tz", [None, "UTC"]) def test_diff_datetime_axis0(self, tz): # GH#18578 @@ -165,3 +158,14 @@ def test_diff_axis1_mixed_dtypes_negative_periods(self): result = df.diff(axis=1, periods=-1) tm.assert_frame_equal(result, expected) + + def test_diff_sparse(self): + # GH#28813 .diff() should work for sparse dataframes as well + sparse_df = pd.DataFrame([[0, 1], [1, 0]], dtype="Sparse[int]") + + result = sparse_df.diff() + expected = pd.DataFrame( + [[np.nan, np.nan], [1.0, -1.0]], dtype=pd.SparseDtype("float", 0.0) + ) + + tm.assert_frame_equal(result, expected) From 26a920c28f51598697a3ae970b23da02ef83cbab Mon Sep 17 00:00:00 2001 From: matteosantama Date: Fri, 22 May 2020 15:28:59 +0000 Subject: [PATCH 04/37] BUG: GH29461 display nanoseconds with strftime() --- pandas/_libs/tslibs/timestamps.pyx | 25 +++++++++++++++++++ .../tests/scalar/timestamp/test_timestamp.py | 14 ++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 12d44413e1350..150ce0868696c 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -7,6 +7,7 @@ construction requirements, we need to do object instantiation in python shadows the python class, where we do any heavy lifting. """ import warnings +import time as _time import numpy as np cimport numpy as cnp @@ -1469,6 +1470,30 @@ default 'raise' np.array([self.value], dtype='i8'), tz=self.tz)[0] return Timestamp(normalized_value).tz_localize(self.tz) + def strftime(self, format: str) -> str: + """ + Override datetime.strftime() method so we can display + nanosecond precision. + """ + freplace = None # the string to use for %f + + newformat = [] + i, n = 0, len(format) + while i < n: + ch = format[i] + if ch == 'f': + # remove accompanying % + newformat.pop() + # and put fractional seconds in its place + newformat.append(f"{self.microsecond * 1000 + self.nanosecond}") + else: + newformat.append(ch) + + i += 1 + + newformat = "".join(newformat) + return _time.strftime(newformat, self.timetuple()) + # Add the min and max fields at the class level cdef int64_t _NS_UPPER_BOUND = np.iinfo(np.int64).max diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index cee7ac450e411..36a0847ef60b7 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -15,7 +15,7 @@ from pandas.compat.numpy import np_datetime64_compat import pandas.util._test_decorators as td -from pandas import NaT, Timedelta, Timestamp +from pandas import NaT, Timedelta, Timestamp, to_datetime import pandas._testing as tm from pandas.tseries import offsets @@ -442,6 +442,18 @@ def test_nanosecond_timestamp(self): assert t.value == expected assert t.nanosecond == 10 + @pytest.mark.parametrize("date", [ + "2020-05-22 08:53:19.123456789", + "2020-05-22 08:53:19.123456", + "2020-05-22 08:53:19" + ]) + @pytest.mark.parametrize("fmt", [ + "%m/%d/%Y %H:%M:%S.%f", "%m%d%Y%H%M%S%f" + ]) + def test_nanosecond_roundtrip(self, date, fmt): + ts = Timestamp(date) + string = ts.strftime(fmt) + assert ts == to_datetime(string, format=fmt) class TestTimestampToJulianDate: def test_compare_1700(self): From 3a529ac6e94504fc55964ff92a04954cdd358fcd Mon Sep 17 00:00:00 2001 From: matteosantama Date: Fri, 22 May 2020 16:12:24 +0000 Subject: [PATCH 05/37] BUG: GH29461 don't display nanoseconds in strftime if none exists for compatability with datetime.strptime() --- pandas/_libs/tslibs/timestamps.pyx | 11 +++++------ pandas/tests/scalar/timestamp/test_timestamp.py | 9 +++++++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 150ce0868696c..f6caaba9c90ee 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1471,11 +1471,9 @@ default 'raise' return Timestamp(normalized_value).tz_localize(self.tz) def strftime(self, format: str) -> str: - """ - Override datetime.strftime() method so we can display - nanosecond precision. - """ - freplace = None # the string to use for %f + # don't do additional processing if its not necessary + if '%f' not in format: + return super().strftime(format) newformat = [] i, n = 0, len(format) @@ -1485,7 +1483,8 @@ default 'raise' # remove accompanying % newformat.pop() # and put fractional seconds in its place - newformat.append(f"{self.microsecond * 1000 + self.nanosecond}") + ns = f"{self.nanosecond:03}" if self.nanosecond else "" + newformat.append(f"{self.microsecond:06}{ns}") else: newformat.append(ch) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 36a0847ef60b7..421cafec81fa4 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -381,6 +381,15 @@ def test_tz_conversion_freq(self, tz_naive_fixture): t2 = Timestamp("2019-01-02 12:00", tz="UTC", freq="T") assert t2.tz_convert(tz="UTC").freq == t2.freq + @pytest.mark.parametrize("_input,fmt,_output", [ + ("2020-05-22 11:07:30", "%Y-%m-%d", "2020-05-22"), + ("2020-05-22 11:07:30.123456", "%Y-%m-%d %f", "2020-05-22 123456"), + ("2020-05-22 11:07:30.123456789", "%f", "123456789"), + ]) + def test_strftime(self, _input, fmt, _output): + ts = Timestamp(_input) + result = ts.strftime(fmt) + assert result == _output class TestTimestampNsOperations: def test_nanosecond_string_parsing(self): From d0124aa228c4f56047917d0a618dc9b3297331a0 Mon Sep 17 00:00:00 2001 From: matteosantama Date: Fri, 22 May 2020 16:17:37 +0000 Subject: [PATCH 06/37] update whatsnew doc --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 19db7dcb4b83e..8a90dc6813963 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -656,6 +656,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.intersection` and :meth:`TimedeltaIndex.intersection` with results not having the correct ``name`` attribute (:issue:`33904`) - Bug in :meth:`DatetimeArray.__setitem__`, :meth:`TimedeltaArray.__setitem__`, :meth:`PeriodArray.__setitem__` incorrectly allowing values with ``int64`` dtype to be silently cast (:issue:`33717`) - Bug in subtracting :class:`TimedeltaIndex` from :class:`Period` incorrectly raising ``TypeError`` in some cases where it should succeed and ``IncompatibleFrequency`` in some cases where it should raise ``TypeError`` (:issue:`33883`) +- Bug in :meth:`Timestamp.strftime` did not display full nanosecond precision (:issue:`29461`) Timedelta ^^^^^^^^^ From 0c0aaf24109a0c9bad98efcdd849b181fb8c2cd9 Mon Sep 17 00:00:00 2001 From: matteosantama Date: Fri, 22 May 2020 16:21:31 +0000 Subject: [PATCH 07/37] black formatting --- .../tests/scalar/timestamp/test_timestamp.py | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 421cafec81fa4..f803939d898bf 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -381,16 +381,20 @@ def test_tz_conversion_freq(self, tz_naive_fixture): t2 = Timestamp("2019-01-02 12:00", tz="UTC", freq="T") assert t2.tz_convert(tz="UTC").freq == t2.freq - @pytest.mark.parametrize("_input,fmt,_output", [ - ("2020-05-22 11:07:30", "%Y-%m-%d", "2020-05-22"), - ("2020-05-22 11:07:30.123456", "%Y-%m-%d %f", "2020-05-22 123456"), - ("2020-05-22 11:07:30.123456789", "%f", "123456789"), - ]) + @pytest.mark.parametrize( + "_input,fmt,_output", + [ + ("2020-05-22 11:07:30", "%Y-%m-%d", "2020-05-22"), + ("2020-05-22 11:07:30.123456", "%Y-%m-%d %f", "2020-05-22 123456"), + ("2020-05-22 11:07:30.123456789", "%f", "123456789"), + ], + ) def test_strftime(self, _input, fmt, _output): ts = Timestamp(_input) result = ts.strftime(fmt) assert result == _output + class TestTimestampNsOperations: def test_nanosecond_string_parsing(self): ts = Timestamp("2013-05-01 07:15:45.123456789") @@ -451,19 +455,21 @@ def test_nanosecond_timestamp(self): assert t.value == expected assert t.nanosecond == 10 - @pytest.mark.parametrize("date", [ - "2020-05-22 08:53:19.123456789", - "2020-05-22 08:53:19.123456", - "2020-05-22 08:53:19" - ]) - @pytest.mark.parametrize("fmt", [ - "%m/%d/%Y %H:%M:%S.%f", "%m%d%Y%H%M%S%f" - ]) + @pytest.mark.parametrize( + "date", + [ + "2020-05-22 08:53:19.123456789", + "2020-05-22 08:53:19.123456", + "2020-05-22 08:53:19", + ], + ) + @pytest.mark.parametrize("fmt", ["%m/%d/%Y %H:%M:%S.%f", "%m%d%Y%H%M%S%f"]) def test_nanosecond_roundtrip(self, date, fmt): ts = Timestamp(date) string = ts.strftime(fmt) assert ts == to_datetime(string, format=fmt) + class TestTimestampToJulianDate: def test_compare_1700(self): r = Timestamp("1700-06-23").to_julian_date() From abdbe4e4d95cec4d16b2df2c975d4b57ae68a8ee Mon Sep 17 00:00:00 2001 From: matteosantama Date: Fri, 22 May 2020 16:49:36 +0000 Subject: [PATCH 08/37] remove trailing whitespace to conform to CI --- pandas/_libs/tslibs/timestamps.pyx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index f6caaba9c90ee..cd694cd080b35 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1474,7 +1474,6 @@ default 'raise' # don't do additional processing if its not necessary if '%f' not in format: return super().strftime(format) - newformat = [] i, n = 0, len(format) while i < n: @@ -1487,13 +1486,10 @@ default 'raise' newformat.append(f"{self.microsecond:06}{ns}") else: newformat.append(ch) - i += 1 - newformat = "".join(newformat) return _time.strftime(newformat, self.timetuple()) - # Add the min and max fields at the class level cdef int64_t _NS_UPPER_BOUND = np.iinfo(np.int64).max # the smallest value we could actually represent is From 97ae6b32b4783606b293fd718517646d9207737f Mon Sep 17 00:00:00 2001 From: matteosantama Date: Fri, 22 May 2020 17:07:21 +0000 Subject: [PATCH 09/37] still trying to pass linting checks --- pandas/_libs/tslibs/timestamps.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index cd694cd080b35..b5cd36966d8f4 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1490,6 +1490,7 @@ default 'raise' newformat = "".join(newformat) return _time.strftime(newformat, self.timetuple()) + # Add the min and max fields at the class level cdef int64_t _NS_UPPER_BOUND = np.iinfo(np.int64).max # the smallest value we could actually represent is From ab0c9d4ca7c52a7ed7d9c72b9f6bc273ff8a0a74 Mon Sep 17 00:00:00 2001 From: matteosantama Date: Sat, 23 May 2020 23:13:06 +0000 Subject: [PATCH 10/37] Add strftime benchmarks --- asv_bench/benchmarks/tslibs/timestamp.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py index 3ef9b814dd79e..36f8f68690026 100644 --- a/asv_bench/benchmarks/tslibs/timestamp.py +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -109,6 +109,17 @@ def time_month_name(self, tz, freq): self.ts.month_name() +class TimestampMethods: + params = ["%Y-%m-%d %H:%M:%S", "%Y-%m%d %H:%M:%S.%f"] + param_names = ["fmt"] + + def setup(self): + self.ts = Timestamp("2020-05-23 18:06:13.123456789") + + def time_strftime(self, fmt): + self.strftime(fmt) + + class TimestampOps: params = [None, "US/Eastern", pytz.UTC, dateutil.tz.tzutc()] param_names = ["tz"] From 532b19c0fb2eaece916921070be70a9985c1ca5f Mon Sep 17 00:00:00 2001 From: matteosantama Date: Sat, 23 May 2020 23:14:00 +0000 Subject: [PATCH 11/37] Early exit strftime if no nanoseconds --- pandas/_libs/tslibs/timestamps.pyx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index b5cd36966d8f4..2273d905c97f7 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1472,7 +1472,7 @@ default 'raise' def strftime(self, format: str) -> str: # don't do additional processing if its not necessary - if '%f' not in format: + if not self.nanosecond or '%f' not in format: return super().strftime(format) newformat = [] i, n = 0, len(format) @@ -1482,8 +1482,7 @@ default 'raise' # remove accompanying % newformat.pop() # and put fractional seconds in its place - ns = f"{self.nanosecond:03}" if self.nanosecond else "" - newformat.append(f"{self.microsecond:06}{ns}") + newformat.append(f"{self.microsecond * 1000 + self.nanosecond}") else: newformat.append(ch) i += 1 From 88fba5e746950b0fdda5ef422de945ab7f6f8946 Mon Sep 17 00:00:00 2001 From: matteosantama Date: Sun, 24 May 2020 03:56:34 +0000 Subject: [PATCH 12/37] Make loop more pythonic --- pandas/_libs/tslibs/timestamps.pyx | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 2273d905c97f7..f0e4837bb3e33 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1475,9 +1475,7 @@ default 'raise' if not self.nanosecond or '%f' not in format: return super().strftime(format) newformat = [] - i, n = 0, len(format) - while i < n: - ch = format[i] + for ch in format: if ch == 'f': # remove accompanying % newformat.pop() @@ -1485,7 +1483,6 @@ default 'raise' newformat.append(f"{self.microsecond * 1000 + self.nanosecond}") else: newformat.append(ch) - i += 1 newformat = "".join(newformat) return _time.strftime(newformat, self.timetuple()) From 2ac690c350bcdc998c194a0c06a2944779d62042 Mon Sep 17 00:00:00 2001 From: matteosantama Date: Sun, 24 May 2020 04:17:25 +0000 Subject: [PATCH 13/37] Fix benchmark test --- asv_bench/benchmarks/tslibs/timestamp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py index 36f8f68690026..cb714d4491312 100644 --- a/asv_bench/benchmarks/tslibs/timestamp.py +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -110,14 +110,14 @@ def time_month_name(self, tz, freq): class TimestampMethods: - params = ["%Y-%m-%d %H:%M:%S", "%Y-%m%d %H:%M:%S.%f"] + params = ["%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M:%S.%f"] param_names = ["fmt"] - def setup(self): + def setup(self, fmt): self.ts = Timestamp("2020-05-23 18:06:13.123456789") def time_strftime(self, fmt): - self.strftime(fmt) + self.ts.strftime(fmt) class TimestampOps: From 3b533c914c436bf24a07cee29e85750374b32f1c Mon Sep 17 00:00:00 2001 From: matteosantama Date: Sun, 24 May 2020 04:53:51 +0000 Subject: [PATCH 14/37] Remove whitespace --- pandas/_libs/tslibs/timestamps.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index f0e4837bb3e33..03130afe3ee59 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1475,7 +1475,7 @@ default 'raise' if not self.nanosecond or '%f' not in format: return super().strftime(format) newformat = [] - for ch in format: + for ch in format: if ch == 'f': # remove accompanying % newformat.pop() From f6ba1c9bbbaa19dd74aa8472aa6831f6637b0d38 Mon Sep 17 00:00:00 2001 From: matteosantama Date: Tue, 26 May 2020 02:48:32 +0000 Subject: [PATCH 15/37] Remove extra function call --- pandas/_libs/tslibs/timestamps.pyx | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 03130afe3ee59..100373de69dfd 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1471,19 +1471,19 @@ default 'raise' return Timestamp(normalized_value).tz_localize(self.tz) def strftime(self, format: str) -> str: - # don't do additional processing if its not necessary - if not self.nanosecond or '%f' not in format: - return super().strftime(format) - newformat = [] - for ch in format: - if ch == 'f': - # remove accompanying % - newformat.pop() - # and put fractional seconds in its place - newformat.append(f"{self.microsecond * 1000 + self.nanosecond}") - else: - newformat.append(ch) - newformat = "".join(newformat) + newformat = format + # only do additional processing if necessary + if self.nanosecond and '%f' in format: + newformat = [] + for ch in format: + if ch == 'f': + # remove accompanying % + newformat.pop() + # and put fractional seconds in its place + newformat.append(f"{self.microsecond * 1000 + self.nanosecond}") + else: + newformat.append(ch) + newformat = "".join(newformat) return _time.strftime(newformat, self.timetuple()) From 07b27e2ef472c4f3c33936cfc3e6ef65cbe041de Mon Sep 17 00:00:00 2001 From: matteosantama Date: Tue, 26 May 2020 15:56:36 +0000 Subject: [PATCH 16/37] Benchmark series.strftime() --- asv_bench/benchmarks/timeseries.py | 33 +++++++++++++++++++----------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index b494dbd8a38fa..eed90b112d115 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -394,33 +394,42 @@ def time_dup_string_tzoffset_dates(self, cache): class DatetimeAccessor: - params = [None, "US/Eastern", "UTC", dateutil.tz.tzutc()] - param_names = "tz" - - def setup(self, tz): + params = ( + [None, "US/Eastern", "UTC", dateutil.tz.tzutc()], + ["%Y-%m-%d %H:%M:%S.%f%z", "%Y-%m-%d %H:%M:%S%z"], + ["T", "S", "NS"], + ) + param_names = ["tz", "fmt", "frequency"] + + def setup(self, tz, fmt, frequency): N = 100000 - self.series = Series(date_range(start="1/1/2000", periods=N, freq="T", tz=tz)) + self.series = Series( + date_range(start="1/1/2000", periods=N, freq=frequency, tz=tz) + ) - def time_dt_accessor(self, tz): + def time_dt_accessor(self, *args): self.series.dt - def time_dt_accessor_normalize(self, tz): + def time_dt_accessor_normalize(self, *args): self.series.dt.normalize() - def time_dt_accessor_month_name(self, tz): + def time_dt_accessor_month_name(self, *args): self.series.dt.month_name() - def time_dt_accessor_day_name(self, tz): + def time_dt_accessor_day_name(self, *args): self.series.dt.day_name() - def time_dt_accessor_time(self, tz): + def time_dt_accessor_time(self, *args): self.series.dt.time - def time_dt_accessor_date(self, tz): + def time_dt_accessor_date(self, *args): self.series.dt.date - def time_dt_accessor_year(self, tz): + def time_dt_accessor_year(self, *args): self.series.dt.year + def time_dt_accessor_strftime(self, _, fmt, *args): + self.series.dt.strftime(fmt) + from .pandas_vb_common import setup # noqa: F401 isort:skip From d72222a403e13d427e1d62fcfd961e37666f1c3e Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Mon, 8 Jun 2020 17:29:46 -0500 Subject: [PATCH 17/37] Use explicitly named parameters in testing --- asv_bench/benchmarks/timeseries.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index eed90b112d115..aeb465297d774 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -407,28 +407,28 @@ def setup(self, tz, fmt, frequency): date_range(start="1/1/2000", periods=N, freq=frequency, tz=tz) ) - def time_dt_accessor(self, *args): + def time_dt_accessor(self, tz, fmt, frequency): self.series.dt - def time_dt_accessor_normalize(self, *args): + def time_dt_accessor_normalize(self, tz, fmt, frequency): self.series.dt.normalize() - def time_dt_accessor_month_name(self, *args): + def time_dt_accessor_month_name(self, tz, fmt, frequency): self.series.dt.month_name() - def time_dt_accessor_day_name(self, *args): + def time_dt_accessor_day_name(self, tz, fmt, frequency): self.series.dt.day_name() - def time_dt_accessor_time(self, *args): + def time_dt_accessor_time(self, tz, fmt, frequency): self.series.dt.time - def time_dt_accessor_date(self, *args): + def time_dt_accessor_date(self, tz, fmt, frequency): self.series.dt.date - def time_dt_accessor_year(self, *args): + def time_dt_accessor_year(self, tz, fmt, frequency): self.series.dt.year - def time_dt_accessor_strftime(self, _, fmt, *args): + def time_dt_accessor_strftime(self, tz, fmt, frequency): self.series.dt.strftime(fmt) From e920d2032aad74427da317b2e17c5d90a0853a0a Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Mon, 8 Jun 2020 17:39:38 -0500 Subject: [PATCH 18/37] Use regex for replacing %f --- pandas/_libs/tslibs/timestamps.pyx | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 85dbfea3eb6cb..8c015460c7154 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1471,19 +1471,9 @@ default 'raise' return Timestamp(normalized_value).tz_localize(self.tz) def strftime(self, format: str) -> str: - newformat = format - # only do additional processing if necessary + # only do nanosecond processing if necessary if self.nanosecond and '%f' in format: - newformat = [] - for ch in format: - if ch == 'f': - # remove accompanying % - newformat.pop() - # and put fractional seconds in its place - newformat.append(f"{self.microsecond * 1000 + self.nanosecond}") - else: - newformat.append(ch) - newformat = "".join(newformat) + newformat = re.sub('%f', f'%f{self.nanosecond}', format) return _time.strftime(newformat, self.timetuple()) From 34db4694f1dcb8fec08f4b3e07d0069aeff9f4d6 Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Mon, 8 Jun 2020 17:42:21 -0500 Subject: [PATCH 19/37] Clean up Timestamp._time_repr to use new strftime functionality --- pandas/_libs/tslibs/timestamps.pyx | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 8c015460c7154..87e371485a33b 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -508,14 +508,7 @@ cdef class _Timestamp(ABCTimestamp): @property def _time_repr(self) -> str: - result = f'{self.hour:02d}:{self.minute:02d}:{self.second:02d}' - - if self.nanosecond != 0: - result += f'.{self.nanosecond + 1000 * self.microsecond:09d}' - elif self.microsecond != 0: - result += f'.{self.microsecond:06d}' - - return result + return self.strftime('%H:%M:%S.%f') @property def _short_repr(self) -> str: From fbe286ead3132afd532659bfaa38d444b7fd308c Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Mon, 8 Jun 2020 18:00:47 -0500 Subject: [PATCH 20/37] Commiting so I can merge master --- pandas/_libs/tslibs/timestamps.pyx | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 87e371485a33b..f622f7d61c70e 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -8,6 +8,7 @@ shadows the python class, where we do any heavy lifting. """ import warnings import time as _time +import re import numpy as np cimport numpy as cnp @@ -508,7 +509,14 @@ cdef class _Timestamp(ABCTimestamp): @property def _time_repr(self) -> str: - return self.strftime('%H:%M:%S.%f') + result = f'{self.hour:02d}:{self.minute:02d}:{self.second:02d}' + + if self.nanosecond != 0: + result += f'.{self.nanosecond + 1000 * self.microsecond:09d}' + elif self.microsecond != 0: + result += f'.{self.microsecond:06d}' + + return result @property def _short_repr(self) -> str: @@ -1466,8 +1474,8 @@ default 'raise' def strftime(self, format: str) -> str: # only do nanosecond processing if necessary if self.nanosecond and '%f' in format: - newformat = re.sub('%f', f'%f{self.nanosecond}', format) - return _time.strftime(newformat, self.timetuple()) + format = re.sub('%f', f'%f{self.nanosecond}', format) + return _time.strftime(format, self.timetuple()) # Add the min and max fields at the class level From 90629c53e3639811c018b9b1f5e194b74354b21e Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Mon, 8 Jun 2020 19:44:40 -0500 Subject: [PATCH 21/37] Use regex for replacing %f --- pandas/_libs/tslibs/timestamps.pyx | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index ddf58faf00551..0298e906fa75c 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1458,9 +1458,14 @@ default 'raise' return Timestamp(normalized[0]).tz_localize(own_tz) def strftime(self, format: str) -> str: - # only do nanosecond processing if necessary - if self.nanosecond and '%f' in format: - format = re.sub('%f', f'%f{self.nanosecond}', format) + # time.strftime() doesn't support %f so we manually replace it + if '%f' in format: + # always show six digits of microseconds, even if its 0s + replacement = f'{self.microsecond:06d}' + # only show nanoseconds if we have them (for comparison to datetime) + if self.nanosecond: + replacement = f'{self.microsecond * 1000 + self.nanosecond:09d}' + format = re.sub('%f', replacement, format) return _time.strftime(format, self.timetuple()) From 821dfbb349323ff9215750e0266d15e1fb7b0e62 Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Mon, 8 Jun 2020 19:48:48 -0500 Subject: [PATCH 22/37] Clean up _time_repr to use new strftime functionality --- pandas/_libs/tslibs/timestamps.pyx | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 0298e906fa75c..5558ebcb27ff1 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -495,14 +495,7 @@ cdef class _Timestamp(ABCTimestamp): @property def _time_repr(self) -> str: - result = f'{self.hour:02d}:{self.minute:02d}:{self.second:02d}' - - if self.nanosecond != 0: - result += f'.{self.nanosecond + 1000 * self.microsecond:09d}' - elif self.microsecond != 0: - result += f'.{self.microsecond:06d}' - - return result + return self.strftime('%H:%M:%S.%f') @property def _short_repr(self) -> str: From 16b0f9f9a858a9c0c8c8dac5aea6f8dfc4871822 Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Tue, 9 Jun 2020 09:09:58 -0500 Subject: [PATCH 23/37] Test for all datetime strftime directives --- .../tests/scalar/timestamp/test_timestamp.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index f803939d898bf..27a053cf3a452 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -394,6 +394,42 @@ def test_strftime(self, _input, fmt, _output): result = ts.strftime(fmt) assert result == _output + @pytest.mark.parametrize( + "fmt", + [ + "%a", + "%A", + "%w", + "%d", + "%b", + "%B", + "%m", + "%y", + "%Y", + "%H", + "%I", + "%p", + "%M", + "%S", + "%f", + "%z", + "%Z", + "%j", + "%U", + "%W", + "%c", + "%x", + "%X", + "%G", + "%u", + "%V", + ], + ) + def test_strftime_components(self, fmt): + ts = Timestamp("2020-06-09 09:04:11.123456", tz="UTC") + dt = to_datetime(ts) + assert ts.strftime(fmt) == dt.strftime(fmt) + class TestTimestampNsOperations: def test_nanosecond_string_parsing(self): From b8565f283817cc821edc4fd3e10c00eb8e11ed13 Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Tue, 9 Jun 2020 09:12:08 -0500 Subject: [PATCH 24/37] Test for all datetime strftime directives --- pandas/tests/scalar/timestamp/test_timestamp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 27a053cf3a452..4804706f5461b 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -428,6 +428,7 @@ def test_strftime(self, _input, fmt, _output): def test_strftime_components(self, fmt): ts = Timestamp("2020-06-09 09:04:11.123456", tz="UTC") dt = to_datetime(ts) + assert isinstance(ts, Timestamp) and isinstance(dt, datetime) assert ts.strftime(fmt) == dt.strftime(fmt) From 6866a3d88b4e389db27136f70cdb60f1b3af677d Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Tue, 9 Jun 2020 09:28:01 -0500 Subject: [PATCH 25/37] Test for all datetime strftime directives --- pandas/tests/scalar/timestamp/test_timestamp.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 4804706f5461b..420cf42e999a7 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -426,9 +426,8 @@ def test_strftime(self, _input, fmt, _output): ], ) def test_strftime_components(self, fmt): - ts = Timestamp("2020-06-09 09:04:11.123456", tz="UTC") - dt = to_datetime(ts) - assert isinstance(ts, Timestamp) and isinstance(dt, datetime) + ts = Timestamp("2020-06-09 09:04:11.123456", tzinfo=utc) + dt = datetime(2020, 6, 9, 9, 4, 11, 123456, tzinfo=utc) assert ts.strftime(fmt) == dt.strftime(fmt) From a4ddcbb5df833bc9603241f4f15d32fcfada9b35 Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Tue, 9 Jun 2020 09:49:03 -0500 Subject: [PATCH 26/37] Call super strftime instead of time strftime --- pandas/_libs/tslibs/timestamps.pyx | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 5558ebcb27ff1..ff57fbfaca39b 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -7,7 +7,6 @@ construction requirements, we need to do object instantiation in python shadows the python class, where we do any heavy lifting. """ import warnings -import time as _time import re import numpy as np @@ -1451,15 +1450,10 @@ default 'raise' return Timestamp(normalized[0]).tz_localize(own_tz) def strftime(self, format: str) -> str: - # time.strftime() doesn't support %f so we manually replace it - if '%f' in format: - # always show six digits of microseconds, even if its 0s - replacement = f'{self.microsecond:06d}' - # only show nanoseconds if we have them (for comparison to datetime) - if self.nanosecond: - replacement = f'{self.microsecond * 1000 + self.nanosecond:09d}' + if '%f' in format and self.nanosecond: + replacement = f'{self.microsecond * 1000 + self.nanosecond:09d}' format = re.sub('%f', replacement, format) - return _time.strftime(format, self.timetuple()) + return super().strftime(format) # Add the min and max fields at the class level From 7fe0a5ed597ab12b167fa9564641937ed780c980 Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Tue, 9 Jun 2020 10:23:38 -0500 Subject: [PATCH 27/37] Improve docstring --- pandas/_libs/tslibs/timestamps.pyx | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index ff57fbfaca39b..69db2ef9ed9b8 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1450,6 +1450,24 @@ default 'raise' return Timestamp(normalized[0]).tz_localize(own_tz) def strftime(self, format: str) -> str: + """ + Constructs datetime style `format` string from Timestamp. + + See `datetime`_ module for all available directives. + .. _datetime: https://docs.python.org/3/library/datetime\ +.html#strftime-and-strptime-format-codes + + Parameters + ---------- + format : str + String of formatting directives + + Returns + ------- + str + String representation of Timestamp + + """ if '%f' in format and self.nanosecond: replacement = f'{self.microsecond * 1000 + self.nanosecond:09d}' format = re.sub('%f', replacement, format) From ce9ef3db6ed0cc1f41b581a9c5e3a52f1479c8a9 Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Tue, 9 Jun 2020 10:26:44 -0500 Subject: [PATCH 28/37] Remove whitespace in docstring --- pandas/_libs/tslibs/timestamps.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 69db2ef9ed9b8..ed2b0a5f40663 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1466,7 +1466,6 @@ default 'raise' ------- str String representation of Timestamp - """ if '%f' in format and self.nanosecond: replacement = f'{self.microsecond * 1000 + self.nanosecond:09d}' From 7b69abb91d0dc931bebda0bb9457e9448fcb7976 Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Tue, 9 Jun 2020 11:30:32 -0500 Subject: [PATCH 29/37] Only show fractional seconds if they exist --- pandas/_libs/tslibs/timestamps.pyx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index ed2b0a5f40663..c0d88f49262f7 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -494,7 +494,10 @@ cdef class _Timestamp(ABCTimestamp): @property def _time_repr(self) -> str: - return self.strftime('%H:%M:%S.%f') + fmt = '%H:%M:%S' + if self.microseond or self.nanosecond: + fmt = '%H:%M:%S.%f' + return self.strftime(fmt) @property def _short_repr(self) -> str: @@ -1456,7 +1459,7 @@ default 'raise' See `datetime`_ module for all available directives. .. _datetime: https://docs.python.org/3/library/datetime\ .html#strftime-and-strptime-format-codes - + Parameters ---------- format : str From 858b3fb744d39226fb5260d7fff8da9dd434eca8 Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Tue, 9 Jun 2020 11:35:43 -0500 Subject: [PATCH 30/37] Docstring not building correctly --- pandas/_libs/tslibs/timestamps.pyx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index c0d88f49262f7..69cdc2dacd757 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1456,9 +1456,8 @@ default 'raise' """ Constructs datetime style `format` string from Timestamp. - See `datetime`_ module for all available directives. - .. _datetime: https://docs.python.org/3/library/datetime\ -.html#strftime-and-strptime-format-codes + See `datetime `_ module for all available directives. Parameters ---------- From e7b85255130fc1c2c15ac2ab9941b4aacc9ac35e Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Tue, 9 Jun 2020 12:08:01 -0500 Subject: [PATCH 31/37] Fixed typo --- pandas/_libs/tslibs/timestamps.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 69cdc2dacd757..921a529e9d5f9 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -495,7 +495,7 @@ cdef class _Timestamp(ABCTimestamp): @property def _time_repr(self) -> str: fmt = '%H:%M:%S' - if self.microseond or self.nanosecond: + if self.microsecond or self.nanosecond: fmt = '%H:%M:%S.%f' return self.strftime(fmt) From 6a2f3d22176837dfea8306167dff296a335adf0f Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Tue, 9 Jun 2020 12:48:48 -0500 Subject: [PATCH 32/37] Check nanoseconds first --- pandas/_libs/tslibs/timestamps.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 921a529e9d5f9..69de3e88b55ae 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1469,7 +1469,7 @@ default 'raise' str String representation of Timestamp """ - if '%f' in format and self.nanosecond: + if self.nanosecond and '%f' in format: replacement = f'{self.microsecond * 1000 + self.nanosecond:09d}' format = re.sub('%f', replacement, format) return super().strftime(format) From cbb735e6f2f7320312c00a4395d12ad313c424cd Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Tue, 9 Jun 2020 16:46:55 -0500 Subject: [PATCH 33/37] Rename testing classes --- asv_bench/benchmarks/timeseries.py | 44 +++++++++++++++--------- asv_bench/benchmarks/tslibs/timestamp.py | 2 +- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index aeb465297d774..301cc42ca3437 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -394,40 +394,50 @@ def time_dup_string_tzoffset_dates(self, cache): class DatetimeAccessor: - params = ( - [None, "US/Eastern", "UTC", dateutil.tz.tzutc()], - ["%Y-%m-%d %H:%M:%S.%f%z", "%Y-%m-%d %H:%M:%S%z"], - ["T", "S", "NS"], - ) - param_names = ["tz", "fmt", "frequency"] + params = ["T", "S", "NS"] + param_anmes = ["frequency"] - def setup(self, tz, fmt, frequency): + def setup(self, frequency): N = 100000 - self.series = Series( - date_range(start="1/1/2000", periods=N, freq=frequency, tz=tz) - ) + self.series = Series(date_range(start="1/1/2000", periods=N, freq=frequency)) - def time_dt_accessor(self, tz, fmt, frequency): + def time_dt_accessor(self): self.series.dt - def time_dt_accessor_normalize(self, tz, fmt, frequency): + def time_dt_accessor_normalize(self): self.series.dt.normalize() - def time_dt_accessor_month_name(self, tz, fmt, frequency): + def time_dt_accessor_month_name(self): self.series.dt.month_name() - def time_dt_accessor_day_name(self, tz, fmt, frequency): + def time_dt_accessor_day_name(self): self.series.dt.day_name() - def time_dt_accessor_time(self, tz, fmt, frequency): + def time_dt_accessor_time(self): self.series.dt.time - def time_dt_accessor_date(self, tz, fmt, frequency): + def time_dt_accessor_date(self): self.series.dt.date - def time_dt_accessor_year(self, tz, fmt, frequency): + def time_dt_accessor_year(self): self.series.dt.year + +class DateTimeAccessorStrftime: + + params = ( + [None, "US/Eastern", "UTC", dateutil.tz.tzutc()], + ["%Y-%m-%d %H:%M:%S.%f%z", "%Y-%m-%d %H:%M:%S%z"], + ["T", "S", "NS"], + ) + param_names = ["tz", "fmt", "frequency"] + + def setup(self, tz, fmt, frequency): + N = 100000 + self.series = Series( + date_range(start="1/1/2000", periods=N, freq=frequency, tz=tz) + ) + def time_dt_accessor_strftime(self, tz, fmt, frequency): self.series.dt.strftime(fmt) diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py index cb714d4491312..1cdd073f955ef 100644 --- a/asv_bench/benchmarks/tslibs/timestamp.py +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -109,7 +109,7 @@ def time_month_name(self, tz, freq): self.ts.month_name() -class TimestampMethods: +class TimestampStrftimeMethod: params = ["%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M:%S.%f"] param_names = ["fmt"] From 0c4ef36f89c79cefc0345a626849fadf438c8147 Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Wed, 10 Jun 2020 22:19:30 -0500 Subject: [PATCH 34/37] Use string replace instead of re package --- pandas/_libs/tslibs/timestamps.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 69de3e88b55ae..a30a305853dc0 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -7,7 +7,6 @@ construction requirements, we need to do object instantiation in python shadows the python class, where we do any heavy lifting. """ import warnings -import re import numpy as np cimport numpy as cnp @@ -1471,7 +1470,7 @@ default 'raise' """ if self.nanosecond and '%f' in format: replacement = f'{self.microsecond * 1000 + self.nanosecond:09d}' - format = re.sub('%f', replacement, format) + format = format.replace('%f', replacement) return super().strftime(format) From 62c11260161d74fb496d56a56b313e9fd565a4ef Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Wed, 10 Jun 2020 22:46:10 -0500 Subject: [PATCH 35/37] Fix test parametrization --- asv_bench/benchmarks/timeseries.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 301cc42ca3437..937c61e4dee4c 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -394,32 +394,32 @@ def time_dup_string_tzoffset_dates(self, cache): class DatetimeAccessor: - params = ["T", "S", "NS"] - param_anmes = ["frequency"] + params = [None, "US/Easter", "UTC", dateutil.tz.tzutc()] + param_names = "tz" - def setup(self, frequency): + def setup(self, tz): N = 100000 - self.series = Series(date_range(start="1/1/2000", periods=N, freq=frequency)) + self.series = Series(date_range(start="1/1/2000", periods=N, freq="T", tz=tz)) - def time_dt_accessor(self): + def time_dt_accessor(self, tz): self.series.dt - def time_dt_accessor_normalize(self): + def time_dt_accessor_normalize(self, tz): self.series.dt.normalize() - def time_dt_accessor_month_name(self): + def time_dt_accessor_month_name(self, tz): self.series.dt.month_name() - def time_dt_accessor_day_name(self): + def time_dt_accessor_day_name(self, tz): self.series.dt.day_name() - def time_dt_accessor_time(self): + def time_dt_accessor_time(self, tz): self.series.dt.time - def time_dt_accessor_date(self): + def time_dt_accessor_date(self, tz): self.series.dt.date - def time_dt_accessor_year(self): + def time_dt_accessor_year(self, tz): self.series.dt.year From 4468168e88deb74a0571c04fca3547158ff192f1 Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Wed, 10 Jun 2020 22:47:08 -0500 Subject: [PATCH 36/37] Fix test parametrization --- asv_bench/benchmarks/timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 937c61e4dee4c..23d4b05c380ad 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -394,7 +394,7 @@ def time_dup_string_tzoffset_dates(self, cache): class DatetimeAccessor: - params = [None, "US/Easter", "UTC", dateutil.tz.tzutc()] + params = [None, "US/Eastern", "UTC", dateutil.tz.tzutc()] param_names = "tz" def setup(self, tz): From 4d433f9d2722a79081830e853bf32fa2a17100ce Mon Sep 17 00:00:00 2001 From: Matteo Santamaria Date: Tue, 7 Jul 2020 19:18:13 -0400 Subject: [PATCH 37/37] Update docstring for NaT to match Timestamp --- pandas/_libs/tslibs/nattype.pyx | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 264013f928d22..437e2dbefa694 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -452,7 +452,25 @@ class NaTType(_NaT): Function is not implemented. Use pd.to_datetime(). """, ) + strftime = _make_error_func( + "strftime", + """ + Constructs datetime style `format` string from Timestamp. + + See `datetime `_ module for all available directives. + + Parameters + ---------- + format : str + String of formatting directives + Returns + ------- + str + String representation of Timestamp + """, + ) utcfromtimestamp = _make_error_func( "utcfromtimestamp", """