From df65e9725be245c942fbfb03e31f30a33c819f60 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 3 Oct 2023 19:37:39 -0700 Subject: [PATCH 1/5] REF: move get_format_datetime64_from_values to DatetimeArray --- pandas/core/arrays/datetimes.py | 10 +++++++--- pandas/io/formats/format.py | 16 ---------------- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b520f9f4a6deb..0e1d70a781f02 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -735,12 +735,16 @@ def astype(self, dtype, copy: bool = True): def _format_native_types( self, *, na_rep: str | float = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: - from pandas.io.formats.format import get_format_datetime64_from_values + from pandas.io.formats.format import is_dates_only - fmt = get_format_datetime64_from_values(self, date_format) + if date_format is None: + ido = is_dates_only(self) + if ido: + # Only dates and no timezone: provide a default format + date_format = "%Y-%m-%d" return tslib.format_array_from_datetime( - self.asi8, tz=self.tz, format=fmt, na_rep=na_rep, reso=self._creso + self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso ) # ----------------------------------------------------------------- diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 922d0f37bee3a..374f86b21172c 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1812,22 +1812,6 @@ def get_format_datetime64( return lambda x: _format_datetime64(x, nat_rep=nat_rep) -def get_format_datetime64_from_values( - values: np.ndarray | DatetimeArray | DatetimeIndex, date_format: str | None -) -> str | None: - """given values and a date_format, return a string format""" - if isinstance(values, np.ndarray) and values.ndim > 1: - # We don't actually care about the order of values, and DatetimeIndex - # only accepts 1D values - values = values.ravel() - - ido = is_dates_only(values) - if ido: - # Only dates and no timezone: provide a default format - return date_format or "%Y-%m-%d" - return date_format - - class Datetime64TZFormatter(Datetime64Formatter): def _format_strings(self) -> list[str]: """we by definition have a TZ""" From 2833660f5fd083dbf7648abbaea14ad5188cd4cd Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 3 Oct 2023 20:45:16 -0700 Subject: [PATCH 2/5] REF: make is_dates_only a DatetimeArray method --- pandas/core/arrays/datetimes.py | 25 ++++++++++++++++--- pandas/core/indexes/datetimes.py | 11 ++------- pandas/io/formats/format.py | 33 +++----------------------- pandas/tests/io/formats/test_format.py | 10 +++++--- 4 files changed, 34 insertions(+), 45 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 0e1d70a781f02..7b71866cb8604 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -28,12 +28,14 @@ get_resolution, get_supported_reso, get_unit_from_dtype, + iNaT, ints_to_pydatetime, is_date_array_normalized, is_supported_unit, is_unitless, normalize_i8_timestamps, npy_unit_to_abbrev, + periods_per_day, timezones, to_offset, tz_convert_from_utc, @@ -735,10 +737,8 @@ def astype(self, dtype, copy: bool = True): def _format_native_types( self, *, na_rep: str | float = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: - from pandas.io.formats.format import is_dates_only - if date_format is None: - ido = is_dates_only(self) + ido = self._is_dates_only if ido: # Only dates and no timezone: provide a default format date_format = "%Y-%m-%d" @@ -747,6 +747,25 @@ def _format_native_types( self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso ) + @property + def _is_dates_only(self) -> bool: + """ + Check if we are round times at midnight (and no timezone), which will + be given a more compact __repr__ than other cases. + """ + if self.tz is not None: + return False + + values_int = self.asi8 + consider_values = values_int != iNaT + dtype = cast(np.dtype, self.dtype) # since we checked tz above + reso = get_unit_from_dtype(dtype) + ppd = periods_per_day(reso) + + # TODO: can we reuse is_date_array_normalized? would need a skipna kwd + even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0 + return even_days + # ----------------------------------------------------------------- # Comparison Methods diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index ae0feba1f9bcf..c00cebd46719b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -393,19 +393,12 @@ def _is_dates_only(self) -> bool: ------- bool """ - - from pandas.io.formats.format import is_dates_only - delta = getattr(self.freq, "delta", None) if delta and delta % dt.timedelta(days=1) != dt.timedelta(days=0): return False - # error: Argument 1 to "is_dates_only" has incompatible type - # "Union[ExtensionArray, ndarray]"; expected "Union[ndarray, - # DatetimeArray, Index, DatetimeIndex]" - - return self.tz is None and is_dates_only(self._values) # type: ignore[arg-type] + return self.tz is None and self._values._is_dates_only def __reduce__(self): d = {"data": self._data, "name": self.name} @@ -428,7 +421,7 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: def _formatter_func(self): from pandas.io.formats.format import get_format_datetime64 - formatter = get_format_datetime64(is_dates_only_=self._is_dates_only) + formatter = get_format_datetime64(is_dates_only=self._is_dates_only) return lambda x: f"'{formatter(x)}'" # -------------------------------------------------------------------- diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 374f86b21172c..b3da30eaa210b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -45,9 +45,7 @@ NaT, Timedelta, Timestamp, - get_unit_from_dtype, iNaT, - periods_per_day, ) from pandas._libs.tslibs.nattype import NaTType @@ -1749,31 +1747,6 @@ def format_percentiles( return [i + "%" for i in out] -def is_dates_only(values: np.ndarray | DatetimeArray | Index | DatetimeIndex) -> bool: - # return a boolean if we are only dates (and don't have a timezone) - if not isinstance(values, Index): - values = values.ravel() - - if not isinstance(values, (DatetimeArray, DatetimeIndex)): - values = DatetimeIndex(values) - - if values.tz is not None: - return False - - values_int = values.asi8 - consider_values = values_int != iNaT - # error: Argument 1 to "py_get_unit_from_dtype" has incompatible type - # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]" - reso = get_unit_from_dtype(values.dtype) # type: ignore[arg-type] - ppd = periods_per_day(reso) - - # TODO: can we reuse is_date_array_normalized? would need a skipna kwd - even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0 - if even_days: - return True - return False - - def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str: if x is NaT: return nat_rep @@ -1799,12 +1772,12 @@ def _format_datetime64_dateonly( def get_format_datetime64( - is_dates_only_: bool, nat_rep: str = "NaT", date_format: str | None = None + is_dates_only: bool, nat_rep: str = "NaT", date_format: str | None = None ) -> Callable: """Return a formatter callable taking a datetime64 as input and providing a string as output""" - if is_dates_only_: + if is_dates_only: return lambda x: _format_datetime64_dateonly( x, nat_rep=nat_rep, date_format=date_format ) @@ -1815,7 +1788,7 @@ def get_format_datetime64( class Datetime64TZFormatter(Datetime64Formatter): def _format_strings(self) -> list[str]: """we by definition have a TZ""" - ido = is_dates_only(self.values) + ido = self.values._is_dates_only values = self.values.astype(object) formatter = self.formatter or get_format_datetime64( ido, date_format=self.date_format diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 53ee449c2dc0c..642ee6446e200 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3308,9 +3308,13 @@ def format_func(x): assert result == ["10:10", "12:12"] def test_datetime64formatter_tz_ms(self): - x = Series( - np.array(["2999-01-01", "2999-01-02", "NaT"], dtype="datetime64[ms]") - ).dt.tz_localize("US/Pacific") + x = ( + Series( + np.array(["2999-01-01", "2999-01-02", "NaT"], dtype="datetime64[ms]") + ) + .dt.tz_localize("US/Pacific") + ._values + ) result = fmt.Datetime64TZFormatter(x).get_result() assert result[0].strip() == "2999-01-01 00:00:00-08:00" assert result[1].strip() == "2999-01-02 00:00:00-08:00" From 9cf6477657ab4a139a972241d57ed59aa3ac7656 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 4 Oct 2023 09:00:33 -0700 Subject: [PATCH 3/5] mypy fixup --- pandas/core/indexes/datetimes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c00cebd46719b..e061ea55148da 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -266,6 +266,7 @@ def _engine_type(self) -> type[libindex.DatetimeEngine]: return libindex.DatetimeEngine _data: DatetimeArray + _values: DatetimeArray tz: dt.tzinfo | None # -------------------------------------------------------------------- From 264562d1cec5b7bb2d324b79c17d2a9ba251733a Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 4 Oct 2023 11:28:42 -0700 Subject: [PATCH 4/5] suggested simplification --- pandas/core/arrays/datetimes.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 1b64dd53b644d..fae42f170a6b6 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -737,11 +737,9 @@ def astype(self, dtype, copy: bool = True): def _format_native_types( self, *, na_rep: str | float = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: - if date_format is None: - ido = self._is_dates_only - if ido: - # Only dates and no timezone: provide a default format - date_format = "%Y-%m-%d" + if date_format is None and self._is_dates_only: + # Only dates and no timezone: provide a default format + date_format = "%Y-%m-%d" return tslib.format_array_from_datetime( self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso From 519ae960100c59de2291edcb29bfa6c7d6130977 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 4 Oct 2023 11:30:11 -0700 Subject: [PATCH 5/5] redundant check --- pandas/core/indexes/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e061ea55148da..62cdce36ed5fb 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -399,7 +399,7 @@ def _is_dates_only(self) -> bool: if delta and delta % dt.timedelta(days=1) != dt.timedelta(days=0): return False - return self.tz is None and self._values._is_dates_only + return self._values._is_dates_only def __reduce__(self): d = {"data": self._data, "name": self.name}