Skip to content

REF: formats.format functions that should be DTA methods #55394

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@
get_resolution,
get_supported_reso,
get_unit_from_dtype,
iNaT,
ints_to_pydatetime,
is_date_array_normalized,
is_supported_unit,
is_unitless,
normalize_i8_timestamps,
npy_unit_to_abbrev,
periods_per_day,
timezones,
to_offset,
tz_convert_from_utc,
Expand Down Expand Up @@ -735,14 +737,35 @@ def astype(self, dtype, copy: bool = True):
def _format_native_types(
self, *, na_rep: str | float = "NaT", date_format=None, **kwargs
) -> npt.NDArray[np.object_]:
from pandas.io.formats.format import get_format_datetime64_from_values

fmt = get_format_datetime64_from_values(self, date_format)
if date_format is None:
ido = self._is_dates_only
if ido:
# Only dates and no timezone: provide a default format
date_format = "%Y-%m-%d"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if date_format is None:
ido = self._is_dates_only
if ido:
# Only dates and no timezone: provide a default format
date_format = "%Y-%m-%d"
if date_format is None and self._is_dates_only:
# Only dates and no timezone: provide a default format
date_format = "%Y-%m-%d"

Nit

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea, will update

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#55405 implements this same method in the mixin class TimelikeOps. That is the better long-term location. Will update whichever PR goes in second.


return tslib.format_array_from_datetime(
self.asi8, tz=self.tz, format=fmt, na_rep=na_rep, reso=self._creso
self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso
)

@property
def _is_dates_only(self) -> bool:
"""
Check if we are round times at midnight (and no timezone), which will
be given a more compact __repr__ than other cases.
"""
if self.tz is not None:
return False

values_int = self.asi8
consider_values = values_int != iNaT
dtype = cast(np.dtype, self.dtype) # since we checked tz above
reso = get_unit_from_dtype(dtype)
ppd = periods_per_day(reso)

# TODO: can we reuse is_date_array_normalized? would need a skipna kwd
even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0
return even_days

# -----------------------------------------------------------------
# Comparison Methods

Expand Down
12 changes: 3 additions & 9 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ def _engine_type(self) -> type[libindex.DatetimeEngine]:
return libindex.DatetimeEngine

_data: DatetimeArray
_values: DatetimeArray
tz: dt.tzinfo | None

# --------------------------------------------------------------------
Expand Down Expand Up @@ -393,19 +394,12 @@ def _is_dates_only(self) -> bool:
-------
bool
"""

from pandas.io.formats.format import is_dates_only

delta = getattr(self.freq, "delta", None)

if delta and delta % dt.timedelta(days=1) != dt.timedelta(days=0):
return False

# error: Argument 1 to "is_dates_only" has incompatible type
# "Union[ExtensionArray, ndarray]"; expected "Union[ndarray,
# DatetimeArray, Index, DatetimeIndex]"

return self.tz is None and is_dates_only(self._values) # type: ignore[arg-type]
return self.tz is None and self._values._is_dates_only

def __reduce__(self):
d = {"data": self._data, "name": self.name}
Expand All @@ -428,7 +422,7 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
def _formatter_func(self):
from pandas.io.formats.format import get_format_datetime64

formatter = get_format_datetime64(is_dates_only_=self._is_dates_only)
formatter = get_format_datetime64(is_dates_only=self._is_dates_only)
return lambda x: f"'{formatter(x)}'"

# --------------------------------------------------------------------
Expand Down
49 changes: 3 additions & 46 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,7 @@
NaT,
Timedelta,
Timestamp,
get_unit_from_dtype,
iNaT,
periods_per_day,
)
from pandas._libs.tslibs.nattype import NaTType

Expand Down Expand Up @@ -1749,31 +1747,6 @@ def format_percentiles(
return [i + "%" for i in out]


def is_dates_only(values: np.ndarray | DatetimeArray | Index | DatetimeIndex) -> bool:
# return a boolean if we are only dates (and don't have a timezone)
if not isinstance(values, Index):
values = values.ravel()

if not isinstance(values, (DatetimeArray, DatetimeIndex)):
values = DatetimeIndex(values)

if values.tz is not None:
return False

values_int = values.asi8
consider_values = values_int != iNaT
# error: Argument 1 to "py_get_unit_from_dtype" has incompatible type
# "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"
reso = get_unit_from_dtype(values.dtype) # type: ignore[arg-type]
ppd = periods_per_day(reso)

# TODO: can we reuse is_date_array_normalized? would need a skipna kwd
even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0
if even_days:
return True
return False


def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str:
if x is NaT:
return nat_rep
Expand All @@ -1799,39 +1772,23 @@ def _format_datetime64_dateonly(


def get_format_datetime64(
is_dates_only_: bool, nat_rep: str = "NaT", date_format: str | None = None
is_dates_only: bool, nat_rep: str = "NaT", date_format: str | None = None
) -> Callable:
"""Return a formatter callable taking a datetime64 as input and providing
a string as output"""

if is_dates_only_:
if is_dates_only:
return lambda x: _format_datetime64_dateonly(
x, nat_rep=nat_rep, date_format=date_format
)
else:
return lambda x: _format_datetime64(x, nat_rep=nat_rep)


def get_format_datetime64_from_values(
values: np.ndarray | DatetimeArray | DatetimeIndex, date_format: str | None
) -> str | None:
"""given values and a date_format, return a string format"""
if isinstance(values, np.ndarray) and values.ndim > 1:
# We don't actually care about the order of values, and DatetimeIndex
# only accepts 1D values
values = values.ravel()

ido = is_dates_only(values)
if ido:
# Only dates and no timezone: provide a default format
return date_format or "%Y-%m-%d"
return date_format


class Datetime64TZFormatter(Datetime64Formatter):
def _format_strings(self) -> list[str]:
"""we by definition have a TZ"""
ido = is_dates_only(self.values)
ido = self.values._is_dates_only
values = self.values.astype(object)
formatter = self.formatter or get_format_datetime64(
ido, date_format=self.date_format
Expand Down
10 changes: 7 additions & 3 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -3308,9 +3308,13 @@ def format_func(x):
assert result == ["10:10", "12:12"]

def test_datetime64formatter_tz_ms(self):
x = Series(
np.array(["2999-01-01", "2999-01-02", "NaT"], dtype="datetime64[ms]")
).dt.tz_localize("US/Pacific")
x = (
Series(
np.array(["2999-01-01", "2999-01-02", "NaT"], dtype="datetime64[ms]")
)
.dt.tz_localize("US/Pacific")
._values
)
result = fmt.Datetime64TZFormatter(x).get_result()
assert result[0].strip() == "2999-01-01 00:00:00-08:00"
assert result[1].strip() == "2999-01-02 00:00:00-08:00"
Expand Down