diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index 884db9ee931d4..e80f134290a7e 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -86,7 +86,10 @@ cdef class Factorizer: self, ndarray[object] values, sort=False, na_sentinel=-1, na_value=None ): """ + Examples + -------- Factorize values with nans replaced by na_sentinel + >>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20) array([ 0, 1, 20]) """ @@ -131,7 +134,10 @@ cdef class Int64Factorizer: def factorize(self, const int64_t[:] values, sort=False, na_sentinel=-1, na_value=None): """ + Examples + -------- Factorize values with nans replaced by na_sentinel + >>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20) array([ 0, 1, 20]) """ diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 8bbbc6db94842..437406cbbd819 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -105,8 +105,7 @@ cdef class BlockPlacement: Py_ssize_t start, stop, end, _ if not self._has_array: start, stop, step, _ = slice_get_indices_ex(self._as_slice) - self._as_array = np.arange(start, stop, step, - dtype=np.int64) + self._as_array = np.arange(start, stop, step, dtype=np.int64) self._has_array = True return self._as_array @@ -283,8 +282,7 @@ cdef slice_getitem(slice slc, ind): s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc) if isinstance(ind, slice): - ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, - s_len) + ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, s_len) if ind_step > 0 and ind_len == s_len: # short-cut for no-op slice diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 1166768472449..55999f2d6fd74 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -481,8 +481,7 @@ cdef class Interval(IntervalMixin): @cython.wraparound(False) @cython.boundscheck(False) -def intervals_to_interval_bounds(ndarray intervals, - bint validate_closed=True): +def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True): """ Parameters ---------- diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index f696591cf3bd1..cbe0e71153565 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -817,18 +817,22 @@ def asof_join_nearest_on_X_by_Y(asof_t[:] left_values, right_indexer = np.empty(left_size, dtype=np.int64) # search both forward and backward - bli, bri = asof_join_backward_on_X_by_Y(left_values, - right_values, - left_by_values, - right_by_values, - allow_exact_matches, - tolerance) - fli, fri = asof_join_forward_on_X_by_Y(left_values, - right_values, - left_by_values, - right_by_values, - allow_exact_matches, - tolerance) + bli, bri = asof_join_backward_on_X_by_Y( + left_values, + right_values, + left_by_values, + right_by_values, + allow_exact_matches, + tolerance, + ) + fli, fri = asof_join_forward_on_X_by_Y( + left_values, + right_values, + left_by_values, + right_by_values, + allow_exact_matches, + tolerance, + ) for i in range(len(bri)): # choose timestamp from right with smaller difference diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index a176c4e41e834..b78b623bfa187 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,27 +1,44 @@ import cython -from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, - PyDateTime_IMPORT, - timedelta, datetime, date, time) +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDateTime_IMPORT, + date, + datetime, + time, + timedelta, +) # import datetime C API PyDateTime_IMPORT cimport numpy as cnp -from numpy cimport int64_t, ndarray, float64_t +from numpy cimport float64_t, int64_t, ndarray import numpy as np cnp.import_array() import pytz from pandas._libs.util cimport ( - is_integer_object, is_float_object, is_datetime64_object) + is_datetime64_object, + is_float_object, + is_integer_object, +) from pandas._libs.tslibs.c_timestamp cimport _Timestamp from pandas._libs.tslibs.np_datetime cimport ( - check_dts_bounds, npy_datetimestruct, _string_to_dts, dt64_to_dtstruct, - dtstruct_to_dt64, pydatetime_to_dt64, pydate_to_dt64, get_datetime64_value) + _string_to_dts, + check_dts_bounds, + dt64_to_dtstruct, + dtstruct_to_dt64, + get_datetime64_value, + npy_datetimestruct, + pydate_to_dt64, + pydatetime_to_dt64, +) + from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.parsing import parse_datetime_string @@ -44,45 +61,71 @@ from pandas._libs.tslibs.timestamps cimport create_timestamp_from_ts from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.tslibs.tzconversion cimport ( - tz_convert_single, tz_convert_utc_to_tzlocal) + tz_convert_single, + tz_convert_utc_to_tzlocal, +) cdef inline object create_datetime_from_ts( - int64_t value, npy_datetimestruct dts, - object tz, object freq, bint fold): - """ convenience routine to construct a datetime.datetime from its parts """ - return datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz, fold=fold) + int64_t value, + npy_datetimestruct dts, + object tz, + object freq, + bint fold +): + """ + Convenience routine to construct a datetime.datetime from its parts. + """ + return datetime( + dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold + ) cdef inline object create_date_from_ts( - int64_t value, npy_datetimestruct dts, - object tz, object freq, bint fold): - """ convenience routine to construct a datetime.date from its parts """ + int64_t value, + npy_datetimestruct dts, + object tz, + object freq, + bint fold +): + """ + Convenience routine to construct a datetime.date from its parts. + """ # GH 25057 add fold argument to match other func_create signatures return date(dts.year, dts.month, dts.day) cdef inline object create_time_from_ts( - int64_t value, npy_datetimestruct dts, - object tz, object freq, bint fold): - """ convenience routine to construct a datetime.time from its parts """ + int64_t value, + npy_datetimestruct dts, + object tz, + object freq, + bint fold +): + """ + Convenience routine to construct a datetime.time from its parts. + """ return time(dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold) @cython.wraparound(False) @cython.boundscheck(False) -def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, - bint fold=0, str box="datetime"): +def ints_to_pydatetime( + const int64_t[:] arr, + object tz=None, + object freq=None, + bint fold=0, + str box="datetime" +): """ - Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp + Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp. Parameters ---------- - arr : array of i8 - tz : str, default None + arr : array of i8 + tz : str, optional convert to this timezone - freq : str/Offset, default None + freq : str/Offset, optional freq to convert fold : bint, default is 0 Due to daylight saving time, one wall clock time can occur twice @@ -91,17 +134,16 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, the wall clock hits the ambiguous time .. versionadded:: 1.1.0 - box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' - If datetime, convert to datetime.datetime - If date, convert to datetime.date - If time, convert to datetime.time - If Timestamp, convert to pandas.Timestamp + box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' + * If datetime, convert to datetime.datetime + * If date, convert to datetime.date + * If time, convert to datetime.time + * If Timestamp, convert to pandas.Timestamp Returns ------- - result : array of dtype specified by box + ndarray of dtype specified by box """ - cdef: Py_ssize_t i, n = len(arr) ndarray[int64_t] trans @@ -224,8 +266,12 @@ def _test_parse_iso8601(ts: str): @cython.wraparound(False) @cython.boundscheck(False) -def format_array_from_datetime(ndarray[int64_t] values, object tz=None, - object format=None, object na_rep=None): +def format_array_from_datetime( + ndarray[int64_t] values, + object tz=None, + object format=None, + object na_rep=None +): """ return a np object array of the string formatted values @@ -303,8 +349,12 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, return result -def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, - str errors='coerce'): +def array_with_unit_to_datetime( + ndarray values, + ndarray mask, + object unit, + str errors='coerce' +): """ Convert the ndarray to datetime according to the time unit. @@ -322,14 +372,13 @@ def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, Parameters ---------- values : ndarray of object - Date-like objects to convert - mask : ndarray of bool - Not-a-time mask for non-nullable integer types conversion, - can be None + Date-like objects to convert. + mask : boolean ndarray + Not-a-time mask for non-nullable integer types conversion, can be None. unit : object - Time unit to use during conversion + Time unit to use during conversion. errors : str, default 'raise' - Error behavior when parsing + Error behavior when parsing. Returns ------- @@ -382,8 +431,7 @@ def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, if ((fvalues < Timestamp.min.value).any() or (fvalues > Timestamp.max.value).any()): - raise OutOfBoundsDatetime(f"cannot convert input with unit " - f"'{unit}'") + raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") result = (iresult * m).astype('M8[ns]') iresult = result.view('i8') iresult[mask] = NPY_NAT @@ -409,8 +457,8 @@ def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, except OverflowError: if is_raise: raise OutOfBoundsDatetime( - f"cannot convert input {val} with the unit " - f"'{unit}'") + f"cannot convert input {val} with the unit '{unit}'" + ) elif is_ignore: raise AssertionError iresult[i] = NPY_NAT @@ -425,16 +473,16 @@ def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, except ValueError: if is_raise: raise ValueError( - f"non convertible value {val} with the unit " - f"'{unit}'") + f"non convertible value {val} with the unit '{unit}'" + ) elif is_ignore: raise AssertionError iresult[i] = NPY_NAT except OverflowError: if is_raise: raise OutOfBoundsDatetime( - f"cannot convert input {val} with the unit " - f"'{unit}'") + f"cannot convert input {val} with the unit '{unit}'" + ) elif is_ignore: raise AssertionError iresult[i] = NPY_NAT @@ -442,8 +490,9 @@ def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, else: if is_raise: - raise ValueError(f"unit='{unit}' not valid with non-numerical " - f"val='{val}'") + raise ValueError( + f"unit='{unit}' not valid with non-numerical val='{val}'" + ) if is_ignore: raise AssertionError @@ -486,9 +535,14 @@ def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, @cython.wraparound(False) @cython.boundscheck(False) -cpdef array_to_datetime(ndarray[object] values, str errors='raise', - bint dayfirst=False, bint yearfirst=False, - object utc=None, bint require_iso8601=False): +cpdef array_to_datetime( + ndarray[object] values, + str errors='raise', + bint dayfirst=False, + bint yearfirst=False, + object utc=None, + bint require_iso8601=False +): """ Converts a 1D array of date-like values to a numpy array of either: 1) datetime64[ns] data @@ -625,8 +679,9 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', iresult[i] = NPY_NAT continue elif is_raise: - raise ValueError(f"time data {val} doesn't " - f"match format specified") + raise ValueError( + f"time data {val} doesn't match format specified" + ) return values, tz_out try: @@ -641,8 +696,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', if is_coerce: iresult[i] = NPY_NAT continue - raise TypeError("invalid string coercion to " - "datetime") + raise TypeError("invalid string coercion to datetime") if tz is not None: seen_datetime_offset = 1 @@ -708,8 +762,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', return ignore_errors_out_of_bounds_fallback(values), tz_out except TypeError: - return array_to_datetime_object(values, errors, - dayfirst, yearfirst) + return array_to_datetime_object(values, errors, dayfirst, yearfirst) if seen_datetime and seen_integer: # we have mixed datetimes & integers @@ -724,8 +777,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', elif is_raise: raise ValueError("mixed datetimes and integers in passed array") else: - return array_to_datetime_object(values, errors, - dayfirst, yearfirst) + return array_to_datetime_object(values, errors, dayfirst, yearfirst) if seen_datetime_offset and not utc_convert: # GH#17697 @@ -736,8 +788,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', # (with individual dateutil.tzoffsets) are returned is_same_offsets = len(out_tzoffset_vals) == 1 if not is_same_offsets: - return array_to_datetime_object(values, errors, - dayfirst, yearfirst) + return array_to_datetime_object(values, errors, dayfirst, yearfirst) else: tz_offset = out_tzoffset_vals.pop() tz_out = pytz.FixedOffset(tz_offset / 60.) @@ -784,8 +835,12 @@ cdef ignore_errors_out_of_bounds_fallback(ndarray[object] values): @cython.wraparound(False) @cython.boundscheck(False) -cdef array_to_datetime_object(ndarray[object] values, str errors, - bint dayfirst=False, bint yearfirst=False): +cdef array_to_datetime_object( + ndarray[object] values, + str errors, + bint dayfirst=False, + bint yearfirst=False +): """ Fall back function for array_to_datetime diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 8bee7da6231ba..50b7fba67e78f 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -38,7 +38,7 @@ def get_time_micros(const int64_t[:] dtindex): cdef: ndarray[int64_t] micros - micros = np.mod(dtindex, DAY_SECONDS * 1000000000, dtype=np.int64) + micros = np.mod(dtindex, DAY_SECONDS * 1_000_000_000, dtype=np.int64) micros //= 1000 return micros @@ -54,13 +54,15 @@ def build_field_sarray(const int64_t[:] dtindex): npy_datetimestruct dts ndarray[int32_t] years, months, days, hours, minutes, seconds, mus - sa_dtype = [('Y', 'i4'), # year - ('M', 'i4'), # month - ('D', 'i4'), # day - ('h', 'i4'), # hour - ('m', 'i4'), # min - ('s', 'i4'), # second - ('u', 'i4')] # microsecond + sa_dtype = [ + ("Y", "i4"), # year + ("M", "i4"), # month + ("D", "i4"), # day + ("h", "i4"), # hour + ("m", "i4"), # min + ("s", "i4"), # second + ("u", "i4"), # microsecond + ] out = np.empty(count, dtype=sa_dtype) @@ -157,9 +159,12 @@ def get_start_end_field(const int64_t[:] dtindex, object field, int mo_off, dom, doy, dow, ldom _month_offset = np.array( - [[0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365], - [0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366]], - dtype=np.int32) + [ + [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365], + [0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366], + ], + dtype=np.int32, + ) out = np.zeros(count, dtype='int8') diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 68a25d0cc481a..7fec4ba5e7d25 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -764,7 +764,9 @@ NaT = c_NaT # Python-visible # ---------------------------------------------------------------------- cdef inline bint checknull_with_nat(object val): - """ utility to check if a value is a nat or not """ + """ + Utility to check if a value is a nat or not. + """ return val is None or util.is_nan(val) or val is c_NaT or val is C_NA diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 5cd3467eed042..64b79200028b6 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1090,11 +1090,10 @@ default 'raise' def normalize(self): """ - Normalize Timestamp to midnight, preserving - tz information. + Normalize Timestamp to midnight, preserving tz information. """ if self.tz is None or is_utc(self.tz): - DAY_NS = DAY_SECONDS * 1000000000 + DAY_NS = DAY_SECONDS * 1_000_000_000 normalized_value = self.value - (self.value % DAY_NS) return Timestamp(normalized_value).tz_localize(self.tz) normalized_value = normalize_i8_timestamps( @@ -1113,7 +1112,7 @@ cdef int64_t _NS_UPPER_BOUND = np.iinfo(np.int64).max # INT64_MIN + 1 == -9223372036854775807 # but to allow overflow free conversion with a microsecond resolution # use the smallest value with a 0 nanosecond unit (0s in last 3 digits) -cdef int64_t _NS_LOWER_BOUND = -9223372036854775000 +cdef int64_t _NS_LOWER_BOUND = -9_223_372_036_854_775_000 # Resolution is in nanoseconds Timestamp.min = Timestamp(_NS_LOWER_BOUND)