From eefc8a44500c33797598c3a3fce7da21a94a14a0 Mon Sep 17 00:00:00 2001 From: Arda Kosar Date: Mon, 10 Oct 2022 23:31:04 -0400 Subject: [PATCH 1/8] read_json `engine` argument integration - added JSONEngine to _typing.py - added engine to `read_json` inputs - added engine to `read_json` docstring - added engine logic to `JsonReader` - added basis of the _make_engine method --- pandas/_typing.py | 3 +++ pandas/io/json/_json.py | 42 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 5c22baa4bd42e..a4757630ab50e 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -319,6 +319,9 @@ def closed(self) -> bool: # read_csv engines CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"] +# read_json engines +JSONEngine = Literal["ujson", "pyarrow"] + # read_xml parsers XMLParsers = Literal["lxml", "etree"] diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 9b8364c449e36..a0491f327cd63 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -29,6 +29,7 @@ DtypeArg, FilePath, IndexLabel, + JSONEngine, JSONSerializable, ReadBuffer, StorageOptions, @@ -72,6 +73,8 @@ build_table_schema, parse_table_schema, ) +from pandas.io.parsers.arrow_parser_wrapper import ArrowParserWrapper +from pandas.io.parsers.base_parser import ParserBase from pandas.io.parsers.readers import validate_integer if TYPE_CHECKING: @@ -380,6 +383,7 @@ def read_json( date_unit: str | None = ..., encoding: str | None = ..., encoding_errors: str | None = ..., + engine: JSONEngine | None = ..., lines: bool = ..., chunksize: int, compression: CompressionOptions = ..., @@ -404,6 +408,7 @@ def read_json( date_unit: str | None = ..., encoding: str | None = ..., encoding_errors: str | None = ..., + engine: JSONEngine | None = ..., lines: bool = ..., chunksize: int, compression: CompressionOptions = ..., @@ -428,6 +433,7 @@ def read_json( date_unit: str | None = ..., encoding: str | None = ..., encoding_errors: str | None = ..., + engine: JSONEngine | None = ..., lines: bool = ..., chunksize: None = ..., compression: CompressionOptions = ..., @@ -451,6 +457,7 @@ def read_json( date_unit: str | None = ..., encoding: str | None = ..., encoding_errors: str | None = ..., + engine: JSONEngine | None = None, lines: bool = ..., chunksize: None = ..., compression: CompressionOptions = ..., @@ -479,6 +486,7 @@ def read_json( date_unit: str | None = None, encoding: str | None = None, encoding_errors: str | None = "strict", + engine: JSONEngine | None = None, lines: bool = False, chunksize: int | None = None, compression: CompressionOptions = "infer", @@ -607,6 +615,9 @@ def read_json( .. versionadded:: 1.3.0 + engine : {{'ujson', 'pyarrow'}} + Parser engine to use. + lines : bool, default False Read the file as a json object per line. @@ -743,6 +754,7 @@ def read_json( precise_float=precise_float, date_unit=date_unit, encoding=encoding, + engine=engine, lines=lines, chunksize=chunksize, compression=compression, @@ -780,6 +792,7 @@ def __init__( precise_float: bool, date_unit, encoding, + engine, lines: bool, chunksize: int | None, compression: CompressionOptions, @@ -798,6 +811,7 @@ def __init__( self.precise_float = precise_float self.date_unit = date_unit self.encoding = encoding + self.engine = engine self.compression = compression self.storage_options = storage_options self.lines = lines @@ -816,8 +830,32 @@ def __init__( if not self.lines: raise ValueError("nrows can only be passed if lines=True") - data = self._get_data_from_filepath(filepath_or_buffer) - self.data = self._preprocess_data(data) + if engine is not None: + self._engine = self._make_engine(filepath_or_buffer, self.engine) + else: + data = self._get_data_from_filepath(filepath_or_buffer) + self.data = self._preprocess_data(data) + + def _make_engine( + self, + filepath_or_buffer: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + engine: JSONEngine, + ) -> ParserBase: + + mapping: dict[str, type[ParserBase]] = { + "pyarrow": ArrowParserWrapper, + "ujson": ..., + } + + if engine not in mapping: + raise ValueError( + f"Unknown engine: {engine} (valid options are {mapping.keys()})" + ) + + if not isinstance(filepath_or_buffer, list): + ... + + return mapping[engine](filepath_or_buffer) def _preprocess_data(self, data): """ From 39409f27cd1192876e5b1eab9e14a5471bf6501a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 15 Oct 2022 03:54:56 -0700 Subject: [PATCH 2/8] REF: _reso->_creso (#49107) --- pandas/_libs/index.pyx | 4 +- pandas/_libs/tslibs/offsets.pyi | 2 +- pandas/_libs/tslibs/offsets.pyx | 16 +-- pandas/_libs/tslibs/period.pyx | 2 +- pandas/_libs/tslibs/timedeltas.pxd | 2 +- pandas/_libs/tslibs/timedeltas.pyi | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 80 ++++++------ pandas/_libs/tslibs/timestamps.pxd | 2 +- pandas/_libs/tslibs/timestamps.pyi | 2 +- pandas/_libs/tslibs/timestamps.pyx | 78 ++++++------ pandas/_libs/tslibs/tzconversion.pxd | 2 +- pandas/_libs/tslibs/tzconversion.pyx | 4 +- pandas/core/arrays/datetimelike.py | 12 +- pandas/core/arrays/datetimes.py | 40 +++--- pandas/core/arrays/timedeltas.py | 6 +- pandas/core/dtypes/dtypes.py | 2 +- pandas/core/indexes/datetimes.py | 2 +- pandas/tests/arrays/test_datetimes.py | 10 +- pandas/tests/arrays/test_timedeltas.py | 20 +-- pandas/tests/dtypes/test_dtypes.py | 2 +- .../scalar/timedelta/test_constructors.py | 24 ++-- .../tests/scalar/timedelta/test_timedelta.py | 68 +++++----- .../scalar/timestamp/test_constructors.py | 6 +- .../tests/scalar/timestamp/test_timestamp.py | 116 +++++++++--------- .../tests/scalar/timestamp/test_timezones.py | 6 +- .../tests/scalar/timestamp/test_unary_ops.py | 18 +-- pandas/tests/tools/test_to_datetime.py | 2 +- pandas/tests/tseries/offsets/test_offsets.py | 2 +- pandas/tseries/frequencies.py | 16 +-- 29 files changed, 274 insertions(+), 274 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 6aa93d9780913..cc0174f795ebe 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -502,7 +502,7 @@ cdef class DatetimeEngine(Int64Engine): if scalar is NaT: return NaT.value elif isinstance(scalar, _Timestamp): - if scalar._reso == self.reso: + if scalar._creso == self.reso: return scalar.value else: # Note: caller is responsible for catching potential ValueError @@ -570,7 +570,7 @@ cdef class TimedeltaEngine(DatetimeEngine): if scalar is NaT: return NaT.value elif isinstance(scalar, _Timedelta): - if scalar._reso == self.reso: + if scalar._creso == self.reso: return scalar.value else: # Note: caller is responsible for catching potential ValueError diff --git a/pandas/_libs/tslibs/offsets.pyi b/pandas/_libs/tslibs/offsets.pyi index 0390aad23d83a..9317a371cc344 100644 --- a/pandas/_libs/tslibs/offsets.pyi +++ b/pandas/_libs/tslibs/offsets.pyi @@ -109,7 +109,7 @@ def to_offset(freq: _BaseOffsetT) -> _BaseOffsetT: ... def to_offset(freq: timedelta | str) -> BaseOffset: ... class Tick(SingleConstructorOffset): - _reso: int + _creso: int _prefix: str _td64_unit: str def __init__(self, n: int = ..., normalize: bool = ...) -> None: ... diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 68577113702eb..8bdd3d6ac259e 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1146,7 +1146,7 @@ cdef class Day(Tick): _prefix = "D" _td64_unit = "D" _period_dtype_code = PeriodDtypeCode.D - _reso = NPY_DATETIMEUNIT.NPY_FR_D + _creso = NPY_DATETIMEUNIT.NPY_FR_D cdef class Hour(Tick): @@ -1154,7 +1154,7 @@ cdef class Hour(Tick): _prefix = "H" _td64_unit = "h" _period_dtype_code = PeriodDtypeCode.H - _reso = NPY_DATETIMEUNIT.NPY_FR_h + _creso = NPY_DATETIMEUNIT.NPY_FR_h cdef class Minute(Tick): @@ -1162,7 +1162,7 @@ cdef class Minute(Tick): _prefix = "T" _td64_unit = "m" _period_dtype_code = PeriodDtypeCode.T - _reso = NPY_DATETIMEUNIT.NPY_FR_m + _creso = NPY_DATETIMEUNIT.NPY_FR_m cdef class Second(Tick): @@ -1170,7 +1170,7 @@ cdef class Second(Tick): _prefix = "S" _td64_unit = "s" _period_dtype_code = PeriodDtypeCode.S - _reso = NPY_DATETIMEUNIT.NPY_FR_s + _creso = NPY_DATETIMEUNIT.NPY_FR_s cdef class Milli(Tick): @@ -1178,7 +1178,7 @@ cdef class Milli(Tick): _prefix = "L" _td64_unit = "ms" _period_dtype_code = PeriodDtypeCode.L - _reso = NPY_DATETIMEUNIT.NPY_FR_ms + _creso = NPY_DATETIMEUNIT.NPY_FR_ms cdef class Micro(Tick): @@ -1186,7 +1186,7 @@ cdef class Micro(Tick): _prefix = "U" _td64_unit = "us" _period_dtype_code = PeriodDtypeCode.U - _reso = NPY_DATETIMEUNIT.NPY_FR_us + _creso = NPY_DATETIMEUNIT.NPY_FR_us cdef class Nano(Tick): @@ -1194,7 +1194,7 @@ cdef class Nano(Tick): _prefix = "N" _td64_unit = "ns" _period_dtype_code = PeriodDtypeCode.N - _reso = NPY_DATETIMEUNIT.NPY_FR_ns + _creso = NPY_DATETIMEUNIT.NPY_FR_ns def delta_to_tick(delta: timedelta) -> Tick: @@ -3394,7 +3394,7 @@ cdef class FY5253Quarter(FY5253Mixin): for qlen in qtr_lens: if qlen * 7 <= tdelta.days: num_qtrs += 1 - tdelta -= (<_Timedelta>Timedelta(days=qlen * 7))._as_creso(norm._reso) + tdelta -= (<_Timedelta>Timedelta(days=qlen * 7))._as_creso(norm._creso) else: break else: diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 55a3741b9cdff..774fd7f20fed6 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1693,7 +1693,7 @@ cdef class _Period(PeriodMixin): return NaT try: - inc = delta_to_nanoseconds(other, reso=self.freq._reso, round_ok=False) + inc = delta_to_nanoseconds(other, reso=self.freq._creso, round_ok=False) except ValueError as err: raise IncompatibleFrequency("Input cannot be converted to " f"Period(freq={self.freqstr})") from err diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index feec08840cb98..921b30b4f91dc 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -18,7 +18,7 @@ cdef class _Timedelta(timedelta): int64_t value # nanoseconds bint _is_populated # are my components populated int64_t _d, _h, _m, _s, _ms, _us, _ns - NPY_DATETIMEUNIT _reso + NPY_DATETIMEUNIT _creso cpdef timedelta to_pytimedelta(_Timedelta self) cdef bint _has_ns(self) diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index 8babcba747b0c..b40b08eb601a4 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -78,7 +78,7 @@ def delta_to_nanoseconds( ) -> int: ... class Timedelta(timedelta): - _reso: int + _creso: int min: ClassVar[Timedelta] max: ClassVar[Timedelta] resolution: ClassVar[Timedelta] diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 2beb3470318b5..3aaf321f301cb 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -240,11 +240,11 @@ cpdef int64_t delta_to_nanoseconds( if is_tick_object(delta): n = delta.n - in_reso = delta._reso + in_reso = delta._creso elif isinstance(delta, _Timedelta): n = delta.value - in_reso = delta._reso + in_reso = delta._creso elif is_timedelta64_object(delta): in_reso = get_datetime64_unit(delta) @@ -339,7 +339,7 @@ cdef convert_to_timedelta64(object ts, str unit): return np.timedelta64(NPY_NAT, "ns") elif isinstance(ts, _Timedelta): # already in the proper format - if ts._reso != NPY_FR_ns: + if ts._creso != NPY_FR_ns: ts = ts._as_unit("ns").asm8 else: ts = np.timedelta64(ts.value, "ns") @@ -740,7 +740,7 @@ cdef bint _validate_ops_compat(other): def _op_unary_method(func, name): def f(self): new_value = func(self.value) - return _timedelta_from_value_and_reso(new_value, self._reso) + return _timedelta_from_value_and_reso(new_value, self._creso) f.__name__ = name return f @@ -792,10 +792,10 @@ def _binary_op_method_timedeltalike(op, name): # Matching numpy, we cast to the higher resolution. Unlike numpy, # we raise instead of silently overflowing during this casting. - if self._reso < other._reso: - self = (<_Timedelta>self)._as_creso(other._reso, round_ok=True) - elif self._reso > other._reso: - other = (<_Timedelta>other)._as_creso(self._reso, round_ok=True) + if self._creso < other._creso: + self = (<_Timedelta>self)._as_creso(other._creso, round_ok=True) + elif self._creso > other._creso: + other = (<_Timedelta>other)._as_creso(self._creso, round_ok=True) res = op(self.value, other.value) if res == NPY_NAT: @@ -803,7 +803,7 @@ def _binary_op_method_timedeltalike(op, name): # TODO: more generally could do an overflowcheck in op? return NaT - return _timedelta_from_value_and_reso(res, reso=self._reso) + return _timedelta_from_value_and_reso(res, reso=self._creso) f.__name__ = name return f @@ -970,7 +970,7 @@ cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso): td_base.value = value td_base._is_populated = 0 - td_base._reso = reso + td_base._creso = reso return td_base @@ -996,7 +996,7 @@ class MinMaxReso: # i.e. this is on the class, default to nanos return Timedelta(val) else: - return Timedelta._from_value_and_reso(val, obj._reso) + return Timedelta._from_value_and_reso(val, obj._creso) def __set__(self, obj, value): raise AttributeError(f"{self._name} is not settable.") @@ -1022,9 +1022,9 @@ cdef class _Timedelta(timedelta): @property def _unit(self) -> str: """ - The abbreviation associated with self._reso. + The abbreviation associated with self._creso. """ - return npy_unit_to_abbrev(self._reso) + return npy_unit_to_abbrev(self._creso) @property def days(self) -> int: # TODO(cython3): make cdef property @@ -1127,7 +1127,7 @@ cdef class _Timedelta(timedelta): else: return NotImplemented - if self._reso == ots._reso: + if self._creso == ots._creso: return cmp_scalar(self.value, ots.value, op) return self._compare_mismatched_resos(ots, op) @@ -1139,18 +1139,18 @@ cdef class _Timedelta(timedelta): npy_datetimestruct dts_other # dispatch to the datetimestruct utils instead of writing new ones! - pandas_datetime_to_datetimestruct(self.value, self._reso, &dts_self) - pandas_datetime_to_datetimestruct(other.value, other._reso, &dts_other) + pandas_datetime_to_datetimestruct(self.value, self._creso, &dts_self) + pandas_datetime_to_datetimestruct(other.value, other._creso, &dts_other) return cmp_dtstructs(&dts_self, &dts_other, op) cdef bint _has_ns(self): - if self._reso == NPY_FR_ns: + if self._creso == NPY_FR_ns: return self.value % 1000 != 0 - elif self._reso < NPY_FR_ns: + elif self._creso < NPY_FR_ns: # i.e. seconds, millisecond, microsecond return False else: - raise NotImplementedError(self._reso) + raise NotImplementedError(self._creso) cdef _ensure_components(_Timedelta self): """ @@ -1162,7 +1162,7 @@ cdef class _Timedelta(timedelta): cdef: pandas_timedeltastruct tds - pandas_timedelta_to_timedeltastruct(self.value, self._reso, &tds) + pandas_timedelta_to_timedeltastruct(self.value, self._creso, &tds) self._d = tds.days self._h = tds.hrs self._m = tds.min @@ -1194,7 +1194,7 @@ cdef class _Timedelta(timedelta): ----- Any nanosecond resolution will be lost. """ - if self._reso == NPY_FR_ns: + if self._creso == NPY_FR_ns: return timedelta(microseconds=int(self.value) / 1000) # TODO(@WillAyd): is this the right way to use components? @@ -1208,7 +1208,7 @@ cdef class _Timedelta(timedelta): Return a numpy.timedelta64 object with 'ns' precision. """ cdef: - str abbrev = npy_unit_to_abbrev(self._reso) + str abbrev = npy_unit_to_abbrev(self._creso) # TODO: way to create a np.timedelta64 obj with the reso directly # instead of having to get the abbrev? return np.timedelta64(self.value, abbrev) @@ -1548,11 +1548,11 @@ cdef class _Timedelta(timedelta): cdef: int64_t value - if reso == self._reso: + if reso == self._creso: return self try: - value = convert_reso(self.value, self._reso, reso, round_ok=round_ok) + value = convert_reso(self.value, self._creso, reso, round_ok=round_ok) except OverflowError as err: unit = npy_unit_to_abbrev(reso) raise OutOfBoundsTimedelta( @@ -1565,10 +1565,10 @@ cdef class _Timedelta(timedelta): """ If _resos do not match, cast to the higher resolution, raising on overflow. """ - if self._reso > other._reso: - other = other._as_creso(self._reso) - elif self._reso < other._reso: - self = self._as_creso(other._reso) + if self._creso > other._creso: + other = other._as_creso(self._creso) + elif self._creso < other._creso: + self = self._as_creso(other._creso) return self, other @@ -1736,7 +1736,7 @@ class Timedelta(_Timedelta): return cls._from_value_and_reso(new_value, reso=new_reso) elif is_tick_object(value): - new_reso = get_supported_reso(value._reso) + new_reso = get_supported_reso(value._creso) new_value = delta_to_nanoseconds(value, reso=new_reso) return cls._from_value_and_reso(new_value, reso=new_reso) @@ -1769,10 +1769,10 @@ class Timedelta(_Timedelta): else: value, reso = state self.value = value - self._reso = reso + self._creso = reso def __reduce__(self): - object_state = self.value, self._reso + object_state = self.value, self._creso return (_timedelta_unpickle, object_state) @cython.cdivision(True) @@ -1784,11 +1784,11 @@ class Timedelta(_Timedelta): from pandas._libs.tslibs.offsets import to_offset to_offset(freq).nanos # raises on non-fixed freq - unit = delta_to_nanoseconds(to_offset(freq), self._reso) + unit = delta_to_nanoseconds(to_offset(freq), self._creso) arr = np.array([self.value], dtype="i8") result = round_nsint64(arr, mode, unit)[0] - return Timedelta._from_value_and_reso(result, self._reso) + return Timedelta._from_value_and_reso(result, self._creso) def round(self, freq): """ @@ -1852,7 +1852,7 @@ class Timedelta(_Timedelta): return _timedelta_from_value_and_reso( (other * self.value), - reso=self._reso, + reso=self._creso, ) elif is_array(other): @@ -1875,7 +1875,7 @@ class Timedelta(_Timedelta): other = Timedelta(other) if other is NaT: return np.nan - if other._reso != self._reso: + if other._creso != self._creso: self, other = self._maybe_cast_to_matching_resos(other) return self.value / float(other.value) @@ -1884,7 +1884,7 @@ class Timedelta(_Timedelta): if util.is_nan(other): return NaT return Timedelta._from_value_and_reso( - (self.value / other), self._reso + (self.value / other), self._creso ) elif is_array(other): @@ -1902,7 +1902,7 @@ class Timedelta(_Timedelta): other = Timedelta(other) if other is NaT: return np.nan - if self._reso != other._reso: + if self._creso != other._creso: self, other = self._maybe_cast_to_matching_resos(other) return float(other.value) / self.value @@ -1930,14 +1930,14 @@ class Timedelta(_Timedelta): other = Timedelta(other) if other is NaT: return np.nan - if self._reso != other._reso: + if self._creso != other._creso: self, other = self._maybe_cast_to_matching_resos(other) return self.value // other.value elif is_integer_object(other) or is_float_object(other): if util.is_nan(other): return NaT - return type(self)._from_value_and_reso(self.value // other, self._reso) + return type(self)._from_value_and_reso(self.value // other, self._creso) elif is_array(other): if other.ndim == 0: @@ -1975,7 +1975,7 @@ class Timedelta(_Timedelta): other = Timedelta(other) if other is NaT: return np.nan - if self._reso != other._reso: + if self._creso != other._creso: self, other = self._maybe_cast_to_matching_resos(other) return other.value // self.value diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index 09aa682fd57a5..397df11144d60 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -24,7 +24,7 @@ cdef class _Timestamp(ABCTimestamp): cdef readonly: int64_t value, nanosecond, year BaseOffset _freq - NPY_DATETIMEUNIT _reso + NPY_DATETIMEUNIT _creso cdef bint _get_start_end_field(self, str field, freq) cdef _get_date_name_field(self, str field, object locale) diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi index 35cca3c905606..e916d7eb12dbf 100644 --- a/pandas/_libs/tslibs/timestamps.pyi +++ b/pandas/_libs/tslibs/timestamps.pyi @@ -27,7 +27,7 @@ _DatetimeT = TypeVar("_DatetimeT", bound=datetime) def integer_op_not_supported(obj: object) -> TypeError: ... class Timestamp(datetime): - _reso: int + _creso: int min: ClassVar[Timestamp] max: ClassVar[Timestamp] diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 2bcdaadb15771..30ead1d4e3142 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -163,7 +163,7 @@ cdef inline _Timestamp create_timestamp_from_ts( ts_base._freq = freq ts_base.year = dts.year ts_base.nanosecond = dts.ps // 1000 - ts_base._reso = reso + ts_base._creso = reso return ts_base @@ -218,9 +218,9 @@ class MinMaxReso: # i.e. this is on the class, default to nanos return cls(val) elif self._name == "resolution": - return Timedelta._from_value_and_reso(val, obj._reso) + return Timedelta._from_value_and_reso(val, obj._creso) else: - return Timestamp._from_value_and_reso(val, obj._reso, tz=None) + return Timestamp._from_value_and_reso(val, obj._creso, tz=None) def __set__(self, obj, value): raise AttributeError(f"{self._name} is not settable.") @@ -257,9 +257,9 @@ cdef class _Timestamp(ABCTimestamp): @property def _unit(self) -> str: """ - The abbreviation associated with self._reso. + The abbreviation associated with self._creso. """ - return npy_unit_to_abbrev(self._reso) + return npy_unit_to_abbrev(self._creso) # ----------------------------------------------------------------- # Constructors @@ -382,7 +382,7 @@ cdef class _Timestamp(ABCTimestamp): raise TypeError( "Cannot compare tz-naive and tz-aware timestamps" ) - if self._reso == ots._reso: + if self._creso == ots._creso: return cmp_scalar(self.value, ots.value, op) return self._compare_mismatched_resos(ots, op) @@ -394,8 +394,8 @@ cdef class _Timestamp(ABCTimestamp): npy_datetimestruct dts_other # dispatch to the datetimestruct utils instead of writing new ones! - pandas_datetime_to_datetimestruct(self.value, self._reso, &dts_self) - pandas_datetime_to_datetimestruct(other.value, other._reso, &dts_other) + pandas_datetime_to_datetimestruct(self.value, self._creso, &dts_self) + pandas_datetime_to_datetimestruct(other.value, other._creso, &dts_other) return cmp_dtstructs(&dts_self, &dts_other, op) cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, @@ -435,17 +435,17 @@ cdef class _Timestamp(ABCTimestamp): # TODO: share this with __sub__, Timedelta.__add__ # Matching numpy, we cast to the higher resolution. Unlike numpy, # we raise instead of silently overflowing during this casting. - if self._reso < other._reso: - self = (<_Timestamp>self)._as_creso(other._reso, round_ok=True) - elif self._reso > other._reso: - other = (<_Timedelta>other)._as_creso(self._reso, round_ok=True) + if self._creso < other._creso: + self = (<_Timestamp>self)._as_creso(other._creso, round_ok=True) + elif self._creso > other._creso: + other = (<_Timedelta>other)._as_creso(self._creso, round_ok=True) nanos = other.value try: new_value = self.value + nanos result = type(self)._from_value_and_reso( - new_value, reso=self._reso, tz=self.tzinfo + new_value, reso=self._creso, tz=self.tzinfo ) except OverflowError as err: # TODO: don't hard-code nanosecond here @@ -524,16 +524,16 @@ cdef class _Timestamp(ABCTimestamp): # Matching numpy, we cast to the higher resolution. Unlike numpy, # we raise instead of silently overflowing during this casting. - if self._reso < other._reso: - self = (<_Timestamp>self)._as_creso(other._reso, round_ok=False) - elif self._reso > other._reso: - other = (<_Timestamp>other)._as_creso(self._reso, round_ok=False) + if self._creso < other._creso: + self = (<_Timestamp>self)._as_creso(other._creso, round_ok=False) + elif self._creso > other._creso: + other = (<_Timestamp>other)._as_creso(self._creso, round_ok=False) # scalar Timestamp/datetime - Timestamp/datetime -> yields a # Timedelta try: res_value = self.value - other.value - return Timedelta._from_value_and_reso(res_value, self._reso) + return Timedelta._from_value_and_reso(res_value, self._creso) except (OverflowError, OutOfBoundsDatetime, OutOfBoundsTimedelta) as err: if isinstance(other, _Timestamp): if both_timestamps: @@ -576,7 +576,7 @@ cdef class _Timestamp(ABCTimestamp): if own_tz is not None and not is_utc(own_tz): pydatetime_to_dtstruct(self, &dts) - val = npy_datetimestruct_to_datetime(self._reso, &dts) + self.nanosecond + val = npy_datetimestruct_to_datetime(self._creso, &dts) + self.nanosecond else: val = self.value return val @@ -600,7 +600,7 @@ cdef class _Timestamp(ABCTimestamp): val = self._maybe_convert_value_to_local() out = get_start_end_field(np.array([val], dtype=np.int64), - field, freqstr, month_kw, self._reso) + field, freqstr, month_kw, self._creso) return out[0] cdef _warn_on_field_deprecation(self, freq, str field): @@ -763,7 +763,7 @@ cdef class _Timestamp(ABCTimestamp): val = self._maybe_convert_value_to_local() out = get_date_name_field(np.array([val], dtype=np.int64), - field, locale=locale, reso=self._reso) + field, locale=locale, reso=self._creso) return out[0] def day_name(self, locale=None) -> str: @@ -912,11 +912,11 @@ cdef class _Timestamp(ABCTimestamp): cdef: local_val = self._maybe_convert_value_to_local() int64_t normalized - int64_t ppd = periods_per_day(self._reso) + int64_t ppd = periods_per_day(self._creso) _Timestamp ts normalized = normalize_i8_stamp(local_val, ppd) - ts = type(self)._from_value_and_reso(normalized, reso=self._reso, tz=None) + ts = type(self)._from_value_and_reso(normalized, reso=self._creso, tz=None) return ts.tz_localize(self.tzinfo) # ----------------------------------------------------------------- @@ -939,10 +939,10 @@ cdef class _Timestamp(ABCTimestamp): reso = NPY_FR_ns else: reso = state[4] - self._reso = reso + self._creso = reso def __reduce__(self): - object_state = self.value, self._freq, self.tzinfo, self._reso + object_state = self.value, self._freq, self.tzinfo, self._creso return (_unpickle_timestamp, object_state) # ----------------------------------------------------------------- @@ -1066,10 +1066,10 @@ cdef class _Timestamp(ABCTimestamp): cdef: int64_t value - if reso == self._reso: + if reso == self._creso: return self - value = convert_reso(self.value, self._reso, reso, round_ok=round_ok) + value = convert_reso(self.value, self._creso, reso, round_ok=round_ok) return type(self)._from_value_and_reso(value, reso=reso, tz=self.tzinfo) def _as_unit(self, str unit, bint round_ok=True): @@ -1108,7 +1108,7 @@ cdef class _Timestamp(ABCTimestamp): # GH 17329 # Note: Naive timestamps will not match datetime.stdlib - denom = periods_per_second(self._reso) + denom = periods_per_second(self._creso) return round(self.value / denom, 6) @@ -1142,7 +1142,7 @@ cdef class _Timestamp(ABCTimestamp): Return a numpy.datetime64 object with 'ns' precision. """ # TODO: find a way to construct dt64 directly from _reso - abbrev = npy_unit_to_abbrev(self._reso) + abbrev = npy_unit_to_abbrev(self._creso) return np.datetime64(self.value, abbrev) def to_numpy(self, dtype=None, copy=False) -> np.datetime64: @@ -1682,7 +1682,7 @@ class Timestamp(_Timestamp): int64_t nanos to_offset(freq).nanos # raises on non-fixed freq - nanos = delta_to_nanoseconds(to_offset(freq), self._reso) + nanos = delta_to_nanoseconds(to_offset(freq), self._creso) if self.tz is not None: value = self.tz_localize(None).value @@ -1693,7 +1693,7 @@ class Timestamp(_Timestamp): # Will only ever contain 1 element for timestamp r = round_nsint64(value, mode, nanos)[0] - result = Timestamp._from_value_and_reso(r, self._reso, None) + result = Timestamp._from_value_and_reso(r, self._creso, None) if self.tz is not None: result = result.tz_localize( self.tz, ambiguous=ambiguous, nonexistent=nonexistent @@ -2099,17 +2099,17 @@ default 'raise' value = tz_localize_to_utc_single(self.value, tz, ambiguous=ambiguous, nonexistent=nonexistent, - reso=self._reso) + reso=self._creso) elif tz is None: # reset tz - value = tz_convert_from_utc_single(self.value, self.tz, reso=self._reso) + value = tz_convert_from_utc_single(self.value, self.tz, reso=self._creso) else: raise TypeError( "Cannot localize tz-aware Timestamp, use tz_convert for conversions" ) - out = type(self)._from_value_and_reso(value, self._reso, tz=tz) + out = type(self)._from_value_and_reso(value, self._creso, tz=tz) if out is not NaT: out._set_freq(self._freq) # avoid warning in constructor return out @@ -2164,7 +2164,7 @@ default 'raise' else: # Same UTC timestamp, different time zone tz = maybe_get_tz(tz) - out = type(self)._from_value_and_reso(self.value, reso=self._reso, tz=tz) + out = type(self)._from_value_and_reso(self.value, reso=self._creso, tz=tz) if out is not NaT: out._set_freq(self._freq) # avoid warning in constructor return out @@ -2245,10 +2245,10 @@ default 'raise' fold = self.fold if tzobj is not None: - value = tz_convert_from_utc_single(value, tzobj, reso=self._reso) + value = tz_convert_from_utc_single(value, tzobj, reso=self._creso) # setup components - pandas_datetime_to_datetimestruct(value, self._reso, &dts) + pandas_datetime_to_datetimestruct(value, self._creso, &dts) dts.ps = self.nanosecond * 1000 # replace @@ -2296,10 +2296,10 @@ default 'raise' ts_input = datetime(**kwargs) ts = convert_datetime_to_tsobject( - ts_input, tzobj, nanos=dts.ps // 1000, reso=self._reso + ts_input, tzobj, nanos=dts.ps // 1000, reso=self._creso ) return create_timestamp_from_ts( - ts.value, dts, tzobj, self._freq, fold, reso=self._reso + ts.value, dts, tzobj, self._freq, fold, reso=self._creso ) def to_julian_date(self) -> np.float64: diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 13735fb5945a4..3a6a6f4e10035 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -23,7 +23,7 @@ cdef int64_t tz_localize_to_utc_single( cdef class Localizer: cdef: tzinfo tz - NPY_DATETIMEUNIT _reso + NPY_DATETIMEUNIT _creso bint use_utc, use_fixed, use_tzlocal, use_dst, use_pytz ndarray trans Py_ssize_t ntrans diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 28ebce9724da9..953ba10993973 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -66,7 +66,7 @@ cdef class Localizer: @cython.boundscheck(False) def __cinit__(self, tzinfo tz, NPY_DATETIMEUNIT reso): self.tz = tz - self._reso = reso + self._creso = reso self.use_utc = self.use_tzlocal = self.use_fixed = False self.use_dst = self.use_pytz = False self.ntrans = -1 # placeholder @@ -121,7 +121,7 @@ cdef class Localizer: return utc_val elif self.use_tzlocal: return utc_val + _tz_localize_using_tzinfo_api( - utc_val, self.tz, to_utc=False, reso=self._reso, fold=fold + utc_val, self.tz, to_utc=False, reso=self._creso, fold=fold ) elif self.use_fixed: return utc_val + self.delta diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ebd440d4b0d24..bcf4b5d58bf74 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -440,7 +440,7 @@ def astype(self, dtype, copy: bool = True): tz=self.tz, freq=self.freq, box="timestamp", - reso=self._reso, + reso=self._creso, ) return converted @@ -1079,7 +1079,7 @@ def _cmp_method(self, other, op): if not is_period_dtype(self.dtype): self = cast(TimelikeOps, self) - if self._reso != other._reso: + if self._creso != other._creso: if not isinstance(other, type(self)): # i.e. Timedelta/Timestamp, cast to ndarray and let # compare_mismatched_resolutions handle broadcasting @@ -2039,7 +2039,7 @@ def _validate_dtype(cls, values, dtype): # -------------------------------------------------------------- @cache_readonly - def _reso(self) -> int: + def _creso(self) -> int: return get_unit_from_dtype(self._ndarray.dtype) @cache_readonly @@ -2068,9 +2068,9 @@ def _as_unit(self: TimelikeOpsT, unit: str) -> TimelikeOpsT: # TODO: annotate other as DatetimeArray | TimedeltaArray | Timestamp | Timedelta # with the return type matching input type. TypeVar? def _ensure_matching_resos(self, other): - if self._reso != other._reso: + if self._creso != other._creso: # Just as with Timestamp/Timedelta, we cast to the higher resolution - if self._reso < other._reso: + if self._creso < other._creso: self = self._as_unit(other._unit) else: other = other._as_unit(self._unit) @@ -2103,7 +2103,7 @@ def _round(self, freq, mode, ambiguous, nonexistent): values = self.view("i8") values = cast(np.ndarray, values) nanos = to_offset(freq).nanos # raises on non-fixed frequencies - nanos = delta_to_nanoseconds(to_offset(freq), self._reso) + nanos = delta_to_nanoseconds(to_offset(freq), self._creso) result_i8 = round_nsint64(values, mode, nanos) result = self._maybe_mask_results(result_i8, fill_value=iNaT) result = result.view(self._ndarray.dtype) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index fb1a5070c6c0d..9ecd9473c903b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -131,20 +131,20 @@ def f(self): month_kw = kwds.get("startingMonth", kwds.get("month", 12)) result = fields.get_start_end_field( - values, field, self.freqstr, month_kw, reso=self._reso + values, field, self.freqstr, month_kw, reso=self._creso ) else: - result = fields.get_date_field(values, field, reso=self._reso) + result = fields.get_date_field(values, field, reso=self._creso) # these return a boolean by-definition return result if field in self._object_ops: - result = fields.get_date_name_field(values, field, reso=self._reso) + result = fields.get_date_name_field(values, field, reso=self._creso) result = self._maybe_mask_results(result, fill_value=None) else: - result = fields.get_date_field(values, field, reso=self._reso) + result = fields.get_date_field(values, field, reso=self._creso) result = self._maybe_mask_results( result, fill_value=None, convert="float64" ) @@ -283,7 +283,7 @@ def _simple_new( # type: ignore[override] else: # DatetimeTZDtype. If we have e.g. DatetimeTZDtype[us, UTC], # then values.dtype should be M8[us]. - assert dtype._reso == get_unit_from_dtype(values.dtype) + assert dtype._creso == get_unit_from_dtype(values.dtype) result = super()._simple_new(values, dtype) result._freq = freq @@ -510,7 +510,7 @@ def _check_compatible_with(self, other, setitem: bool = False): def _box_func(self, x: np.datetime64) -> Timestamp | NaTType: # GH#42228 value = x.view("i8") - ts = Timestamp._from_value_and_reso(value, reso=self._reso, tz=self.tz) + ts = Timestamp._from_value_and_reso(value, reso=self._creso, tz=self.tz) # Non-overlapping identity check (left operand type: "Timestamp", # right operand type: "NaTType") if ts is not NaT: # type: ignore[comparison-overlap] @@ -577,11 +577,11 @@ def is_normalized(self) -> bool: """ Returns True if all of the dates are at midnight ("no time") """ - return is_date_array_normalized(self.asi8, self.tz, reso=self._reso) + return is_date_array_normalized(self.asi8, self.tz, reso=self._creso) @property # NB: override with cache_readonly in immutable subclasses def _resolution_obj(self) -> Resolution: - return get_resolution(self.asi8, self.tz, reso=self._reso) + return get_resolution(self.asi8, self.tz, reso=self._creso) # ---------------------------------------------------------------- # Array-Like / EA-Interface Methods @@ -619,7 +619,7 @@ def __iter__(self) -> Iterator: tz=self.tz, freq=self.freq, box="timestamp", - reso=self._reso, + reso=self._creso, ) yield from converted @@ -687,7 +687,7 @@ def _format_native_types( fmt = get_format_datetime64_from_values(self, date_format) return tslib.format_array_from_datetime( - self.asi8, tz=self.tz, format=fmt, na_rep=na_rep, reso=self._reso + self.asi8, tz=self.tz, format=fmt, na_rep=na_rep, reso=self._creso ) # ----------------------------------------------------------------- @@ -773,7 +773,7 @@ def _local_timestamps(self) -> npt.NDArray[np.int64]: if self.tz is None or timezones.is_utc(self.tz): # Avoid the copy that would be made in tzconversion return self.asi8 - return tz_convert_from_utc(self.asi8, self.tz, reso=self._reso) + return tz_convert_from_utc(self.asi8, self.tz, reso=self._creso) def tz_convert(self, tz) -> DatetimeArray: """ @@ -1021,7 +1021,7 @@ def tz_localize( tz, ambiguous=ambiguous, nonexistent=nonexistent, - reso=self._reso, + reso=self._creso, ) new_dates = new_dates.view(f"M8[{self._unit}]") dtype = tz_to_dtype(tz, unit=self._unit) @@ -1047,7 +1047,7 @@ def to_pydatetime(self) -> npt.NDArray[np.object_]: ------- datetimes : ndarray[object] """ - return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._reso) + return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._creso) def normalize(self) -> DatetimeArray: """ @@ -1087,7 +1087,7 @@ def normalize(self) -> DatetimeArray: '2014-08-01 00:00:00+05:30'], dtype='datetime64[ns, Asia/Calcutta]', freq=None) """ - new_values = normalize_i8_timestamps(self.asi8, self.tz, reso=self._reso) + new_values = normalize_i8_timestamps(self.asi8, self.tz, reso=self._creso) dt64_values = new_values.view(self._ndarray.dtype) dta = type(self)._simple_new(dt64_values, dtype=dt64_values.dtype) @@ -1242,7 +1242,7 @@ def month_name(self, locale=None) -> npt.NDArray[np.object_]: values = self._local_timestamps() result = fields.get_date_name_field( - values, "month_name", locale=locale, reso=self._reso + values, "month_name", locale=locale, reso=self._creso ) result = self._maybe_mask_results(result, fill_value=None) return result @@ -1286,7 +1286,7 @@ def day_name(self, locale=None) -> npt.NDArray[np.object_]: values = self._local_timestamps() result = fields.get_date_name_field( - values, "day_name", locale=locale, reso=self._reso + values, "day_name", locale=locale, reso=self._creso ) result = self._maybe_mask_results(result, fill_value=None) return result @@ -1303,7 +1303,7 @@ def time(self) -> npt.NDArray[np.object_]: # keeping their timezone and not using UTC timestamps = self._local_timestamps() - return ints_to_pydatetime(timestamps, box="time", reso=self._reso) + return ints_to_pydatetime(timestamps, box="time", reso=self._creso) @property def timetz(self) -> npt.NDArray[np.object_]: @@ -1312,7 +1312,7 @@ def timetz(self) -> npt.NDArray[np.object_]: The time part of the Timestamps. """ - return ints_to_pydatetime(self.asi8, self.tz, box="time", reso=self._reso) + return ints_to_pydatetime(self.asi8, self.tz, box="time", reso=self._creso) @property def date(self) -> npt.NDArray[np.object_]: @@ -1327,7 +1327,7 @@ def date(self) -> npt.NDArray[np.object_]: # keeping their timezone and not using UTC timestamps = self._local_timestamps() - return ints_to_pydatetime(timestamps, box="date", reso=self._reso) + return ints_to_pydatetime(timestamps, box="date", reso=self._creso) def isocalendar(self) -> DataFrame: """ @@ -1366,7 +1366,7 @@ def isocalendar(self) -> DataFrame: from pandas import DataFrame values = self._local_timestamps() - sarray = fields.build_isocalendar_sarray(values, reso=self._reso) + sarray = fields.build_isocalendar_sarray(values, reso=self._creso) iso_calendar_df = DataFrame( sarray, columns=["year", "week", "day"], dtype="UInt32" ) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 74cc9e50f97bb..92b9222cfc9bc 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -73,7 +73,7 @@ def _field_accessor(name: str, alias: str, docstring: str): def f(self) -> np.ndarray: values = self.asi8 - result = get_timedelta_field(values, alias, reso=self._reso) + result = get_timedelta_field(values, alias, reso=self._creso) if self._hasna: result = self._maybe_mask_results( result, fill_value=None, convert="float64" @@ -149,7 +149,7 @@ def _box_func(self, x: np.timedelta64) -> Timedelta | NaTType: y = x.view("i8") if y == NaT.value: return NaT - return Timedelta._from_value_and_reso(y, reso=self._reso) + return Timedelta._from_value_and_reso(y, reso=self._creso) @property # error: Return type "dtype" of "dtype" incompatible with return type @@ -795,7 +795,7 @@ def total_seconds(self) -> npt.NDArray[np.float64]: Float64Index([0.0, 86400.0, 172800.0, 259200.0, 345600.0], dtype='float64') """ - pps = periods_per_second(self._reso) + pps = periods_per_second(self._creso) return self._maybe_mask_results(self.asi8 / pps, fill_value=None) def to_pytimedelta(self) -> npt.NDArray[np.object_]: diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 03025ce342a57..cd1753bc8fec1 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -713,7 +713,7 @@ def __init__(self, unit: str_type | DatetimeTZDtype = "ns", tz=None) -> None: self._tz = tz @cache_readonly - def _reso(self) -> int: + def _creso(self) -> int: """ The NPY_DATETIMEUNIT corresponding to this dtype's resolution. """ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index bb9d9f69ed38c..247126227c587 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -500,7 +500,7 @@ def _get_time_micros(self) -> npt.NDArray[np.int64]: """ values = self._data._local_timestamps() - reso = self._data._reso + reso = self._data._creso ppd = periods_per_day(reso) frac = values % ppd diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index dfe41896c2665..b27d90e43d860 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -71,7 +71,7 @@ def test_non_nano(self, unit, reso, dtype): dta = DatetimeArray._simple_new(arr, dtype=dtype) assert dta.dtype == dtype - assert dta[0]._reso == reso + assert dta[0]._creso == reso assert tz_compare(dta.tz, dta[0].tz) assert (dta[0] == dta[:1]).all() @@ -124,7 +124,7 @@ def test_std_non_nano(self, unit): # we should match the nano-reso std, but floored to our reso. res = dta.std() - assert res._reso == dta._reso + assert res._creso == dta._creso assert res == dti.std().floor(unit) @pytest.mark.filterwarnings("ignore:Converting to PeriodArray.*:UserWarning") @@ -141,12 +141,12 @@ def test_iter(self, dta): assert type(res) is pd.Timestamp assert res.value == expected.value - assert res._reso == expected._reso + assert res._creso == expected._creso assert res == expected def test_astype_object(self, dta): result = dta.astype(object) - assert all(x._reso == dta._reso for x in result) + assert all(x._creso == dta._creso for x in result) assert all(x == y for x, y in zip(result, dta)) def test_to_pydatetime(self, dta_dti): @@ -240,7 +240,7 @@ def test_add_timedeltalike_scalar_mismatched_reso(self, dta_dti, scalar): dta, dti = dta_dti td = pd.Timedelta(scalar) - exp_reso = max(dta._reso, td._reso) + exp_reso = max(dta._creso, td._creso) exp_unit = npy_unit_to_abbrev(exp_reso) expected = (dti + td)._data._as_unit(exp_unit) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 6c48ee3b6405e..75dff66a91365 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -40,7 +40,7 @@ def test_non_nano(self, unit, reso): tda = TimedeltaArray._simple_new(arr, dtype=arr.dtype) assert tda.dtype == arr.dtype - assert tda[0]._reso == reso + assert tda[0]._creso == reso @pytest.mark.parametrize("field", TimedeltaArray._field_ops) def test_fields(self, tda, field): @@ -80,23 +80,23 @@ def test_timedelta_array_total_seconds(self): def test_add_nat_datetimelike_scalar(self, nat, tda): result = tda + nat assert isinstance(result, DatetimeArray) - assert result._reso == tda._reso + assert result._creso == tda._creso assert result.isna().all() result = nat + tda assert isinstance(result, DatetimeArray) - assert result._reso == tda._reso + assert result._creso == tda._creso assert result.isna().all() def test_add_pdnat(self, tda): result = tda + pd.NaT assert isinstance(result, TimedeltaArray) - assert result._reso == tda._reso + assert result._creso == tda._creso assert result.isna().all() result = pd.NaT + tda assert isinstance(result, TimedeltaArray) - assert result._reso == tda._reso + assert result._creso == tda._creso assert result.isna().all() # TODO: 2022-07-11 this is the only test that gets to DTA.tz_convert @@ -130,28 +130,28 @@ def test_mul_scalar(self, tda): result = tda * other expected = TimedeltaArray._simple_new(tda._ndarray * other, dtype=tda.dtype) tm.assert_extension_array_equal(result, expected) - assert result._reso == tda._reso + assert result._creso == tda._creso def test_mul_listlike(self, tda): other = np.arange(len(tda)) result = tda * other expected = TimedeltaArray._simple_new(tda._ndarray * other, dtype=tda.dtype) tm.assert_extension_array_equal(result, expected) - assert result._reso == tda._reso + assert result._creso == tda._creso def test_mul_listlike_object(self, tda): other = np.arange(len(tda)) result = tda * other.astype(object) expected = TimedeltaArray._simple_new(tda._ndarray * other, dtype=tda.dtype) tm.assert_extension_array_equal(result, expected) - assert result._reso == tda._reso + assert result._creso == tda._creso def test_div_numeric_scalar(self, tda): other = 2 result = tda / other expected = TimedeltaArray._simple_new(tda._ndarray / other, dtype=tda.dtype) tm.assert_extension_array_equal(result, expected) - assert result._reso == tda._reso + assert result._creso == tda._creso def test_div_td_scalar(self, tda): other = timedelta(seconds=1) @@ -164,7 +164,7 @@ def test_div_numeric_array(self, tda): result = tda / other expected = TimedeltaArray._simple_new(tda._ndarray / other, dtype=tda.dtype) tm.assert_extension_array_equal(result, expected) - assert result._reso == tda._reso + assert result._creso == tda._creso def test_div_td_array(self, tda): other = tda._ndarray + tda._ndarray[-1] diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index aeae5fec481ec..7f6ec8b328c87 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -268,7 +268,7 @@ def test_hash_vs_equality(self, dtype): def test_construction_non_nanosecond(self): res = DatetimeTZDtype("ms", "US/Eastern") assert res.unit == "ms" - assert res._reso == NpyDatetimeUnit.NPY_FR_ms.value + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value assert res.str == "|M8[ms]" assert str(res) == "datetime64[ms, US/Eastern]" diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 4e1d1c696b25c..7540813fd302b 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -45,12 +45,12 @@ def test_from_td64_retain_resolution(): td = Timedelta(obj) assert td.value == obj.view("i8") - assert td._reso == NpyDatetimeUnit.NPY_FR_ms.value + assert td._creso == NpyDatetimeUnit.NPY_FR_ms.value # Case where we cast to nearest-supported reso obj2 = np.timedelta64(1234, "D") td2 = Timedelta(obj2) - assert td2._reso == NpyDatetimeUnit.NPY_FR_s.value + assert td2._creso == NpyDatetimeUnit.NPY_FR_s.value assert td2 == obj2 assert td2.days == 1234 @@ -58,7 +58,7 @@ def test_from_td64_retain_resolution(): obj3 = np.timedelta64(1000000000000000000, "us") td3 = Timedelta(obj3) assert td3.total_seconds() == 1000000000000 - assert td3._reso == NpyDatetimeUnit.NPY_FR_us.value + assert td3._creso == NpyDatetimeUnit.NPY_FR_us.value def test_from_pytimedelta_us_reso(): @@ -66,31 +66,31 @@ def test_from_pytimedelta_us_reso(): td = timedelta(days=4, minutes=3) result = Timedelta(td) assert result.to_pytimedelta() == td - assert result._reso == NpyDatetimeUnit.NPY_FR_us.value + assert result._creso == NpyDatetimeUnit.NPY_FR_us.value def test_from_tick_reso(): tick = offsets.Nano() - assert Timedelta(tick)._reso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_ns.value tick = offsets.Micro() - assert Timedelta(tick)._reso == NpyDatetimeUnit.NPY_FR_us.value + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_us.value tick = offsets.Milli() - assert Timedelta(tick)._reso == NpyDatetimeUnit.NPY_FR_ms.value + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_ms.value tick = offsets.Second() - assert Timedelta(tick)._reso == NpyDatetimeUnit.NPY_FR_s.value + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value # everything above Second gets cast to the closest supported reso: second tick = offsets.Minute() - assert Timedelta(tick)._reso == NpyDatetimeUnit.NPY_FR_s.value + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value tick = offsets.Hour() - assert Timedelta(tick)._reso == NpyDatetimeUnit.NPY_FR_s.value + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value tick = offsets.Day() - assert Timedelta(tick)._reso == NpyDatetimeUnit.NPY_FR_s.value + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value def test_construction(): @@ -282,7 +282,7 @@ def test_overflow_on_construction(): # used to overflow before non-ns support td = Timedelta(timedelta(days=13 * 19999)) - assert td._reso == NpyDatetimeUnit.NPY_FR_us.value + assert td._creso == NpyDatetimeUnit.NPY_FR_us.value assert td.days == 13 * 19999 diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 295222320020d..e7f97a7269aa3 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -34,27 +34,27 @@ def test_as_unit(self): res = td._as_unit("us") assert res.value == td.value // 1000 - assert res._reso == NpyDatetimeUnit.NPY_FR_us.value + assert res._creso == NpyDatetimeUnit.NPY_FR_us.value rt = res._as_unit("ns") assert rt.value == td.value - assert rt._reso == td._reso + assert rt._creso == td._creso res = td._as_unit("ms") assert res.value == td.value // 1_000_000 - assert res._reso == NpyDatetimeUnit.NPY_FR_ms.value + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value rt = res._as_unit("ns") assert rt.value == td.value - assert rt._reso == td._reso + assert rt._creso == td._creso res = td._as_unit("s") assert res.value == td.value // 1_000_000_000 - assert res._reso == NpyDatetimeUnit.NPY_FR_s.value + assert res._creso == NpyDatetimeUnit.NPY_FR_s.value rt = res._as_unit("ns") assert rt.value == td.value - assert rt._reso == td._reso + assert rt._creso == td._creso def test_as_unit_overflows(self): # microsecond that would be just out of bounds for nano @@ -67,7 +67,7 @@ def test_as_unit_overflows(self): res = td._as_unit("ms") assert res.value == us // 1000 - assert res._reso == NpyDatetimeUnit.NPY_FR_ms.value + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value def test_as_unit_rounding(self): td = Timedelta(microseconds=1500) @@ -76,7 +76,7 @@ def test_as_unit_rounding(self): expected = Timedelta(milliseconds=1) assert res == expected - assert res._reso == NpyDatetimeUnit.NPY_FR_ms.value + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value assert res.value == 1 with pytest.raises(ValueError, match="Cannot losslessly convert units"): @@ -131,26 +131,26 @@ def test_from_value_and_reso(self, unit, val): # Just checking that the fixture is giving us what we asked for td = Timedelta._from_value_and_reso(val, unit) assert td.value == val - assert td._reso == unit + assert td._creso == unit assert td.days == 106752 def test_unary_non_nano(self, td, unit): - assert abs(td)._reso == unit - assert (-td)._reso == unit - assert (+td)._reso == unit + assert abs(td)._creso == unit + assert (-td)._creso == unit + assert (+td)._creso == unit def test_sub_preserves_reso(self, td, unit): res = td - td expected = Timedelta._from_value_and_reso(0, unit) assert res == expected - assert res._reso == unit + assert res._creso == unit def test_mul_preserves_reso(self, td, unit): # The td fixture should always be far from the implementation # bound, so doubling does not risk overflow. res = td * 2 assert res.value == td.value * 2 - assert res._reso == unit + assert res._creso == unit def test_cmp_cross_reso(self, td): # numpy gets this wrong because of silent overflow @@ -208,11 +208,11 @@ def test_truediv_numeric(self, td): res = td / 2 assert res.value == td.value / 2 - assert res._reso == td._reso + assert res._creso == td._creso res = td / 2.0 assert res.value == td.value / 2 - assert res._reso == td._reso + assert res._creso == td._creso def test_floordiv_timedeltalike(self, td): assert td // td == 1 @@ -242,21 +242,21 @@ def test_floordiv_numeric(self, td): res = td // 2 assert res.value == td.value // 2 - assert res._reso == td._reso + assert res._creso == td._creso res = td // 2.0 assert res.value == td.value // 2 - assert res._reso == td._reso + assert res._creso == td._creso assert td // np.array(np.nan) is NaT res = td // np.array(2) assert res.value == td.value // 2 - assert res._reso == td._reso + assert res._creso == td._creso res = td // np.array(2.0) assert res.value == td.value // 2 - assert res._reso == td._reso + assert res._creso == td._creso def test_addsub_mismatched_reso(self, td): # need to cast to since td is out of bounds for ns, so @@ -265,19 +265,19 @@ def test_addsub_mismatched_reso(self, td): # td is out of bounds for ns result = td + other - assert result._reso == other._reso + assert result._creso == other._creso assert result.days == td.days + 1 result = other + td - assert result._reso == other._reso + assert result._creso == other._creso assert result.days == td.days + 1 result = td - other - assert result._reso == other._reso + assert result._creso == other._creso assert result.days == td.days - 1 result = other - td - assert result._reso == other._reso + assert result._creso == other._creso assert result.days == 1 - td.days other2 = Timedelta(500) @@ -293,32 +293,32 @@ def test_addsub_mismatched_reso(self, td): def test_min(self, td): assert td.min <= td - assert td.min._reso == td._reso + assert td.min._creso == td._creso assert td.min.value == NaT.value + 1 def test_max(self, td): assert td.max >= td - assert td.max._reso == td._reso + assert td.max._creso == td._creso assert td.max.value == np.iinfo(np.int64).max def test_resolution(self, td): - expected = Timedelta._from_value_and_reso(1, td._reso) + expected = Timedelta._from_value_and_reso(1, td._creso) result = td.resolution assert result == expected - assert result._reso == expected._reso + assert result._creso == expected._creso def test_timedelta_class_min_max_resolution(): # when accessed on the class (as opposed to an instance), we default # to nanoseconds assert Timedelta.min == Timedelta(NaT.value + 1) - assert Timedelta.min._reso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timedelta.min._creso == NpyDatetimeUnit.NPY_FR_ns.value assert Timedelta.max == Timedelta(np.iinfo(np.int64).max) - assert Timedelta.max._reso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timedelta.max._creso == NpyDatetimeUnit.NPY_FR_ns.value assert Timedelta.resolution == Timedelta(1) - assert Timedelta.resolution._reso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timedelta.resolution._creso == NpyDatetimeUnit.NPY_FR_ns.value class TestTimedeltaUnaryOps: @@ -759,15 +759,15 @@ def test_round_non_nano(self, unit): res = td.round("min") assert res == Timedelta("1 days 02:35:00") - assert res._reso == td._reso + assert res._creso == td._creso res = td.floor("min") assert res == Timedelta("1 days 02:34:00") - assert res._reso == td._reso + assert res._creso == td._creso res = td.ceil("min") assert res == Timedelta("1 days 02:35:00") - assert res._reso == td._reso + assert res._creso == td._creso def test_identity(self): diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 9b7d8d82a9b98..757abbf3e662c 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -458,8 +458,8 @@ def test_out_of_bounds_value(self): # We used to raise on these before supporting non-nano us_val = NpyDatetimeUnit.NPY_FR_us.value - assert Timestamp(min_ts_us - one_us)._reso == us_val - assert Timestamp(max_ts_us + one_us)._reso == us_val + assert Timestamp(min_ts_us - one_us)._creso == us_val + assert Timestamp(max_ts_us + one_us)._creso == us_val # https://github.com/numpy/numpy/issues/22346 for why # we can't use the same construction as above with minute resolution @@ -506,7 +506,7 @@ def test_bounds_with_different_units(self): assert ts.value == dt64.view("i8") else: # we chose the closest unit that we _do_ support - assert ts._reso == NpyDatetimeUnit.NPY_FR_s.value + assert ts._creso == NpyDatetimeUnit.NPY_FR_s.value # With more extreme cases, we can't even fit inside second resolution info = np.iinfo(np.int64) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 61663f774ced0..4f8c6fcc57186 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -716,17 +716,17 @@ def ts(self, dt64): @pytest.fixture def ts_tz(self, ts, tz_aware_fixture): tz = maybe_get_tz(tz_aware_fixture) - return Timestamp._from_value_and_reso(ts.value, ts._reso, tz) + return Timestamp._from_value_and_reso(ts.value, ts._creso, tz) def test_non_nano_construction(self, dt64, ts, reso): assert ts.value == dt64.view("i8") if reso == "s": - assert ts._reso == NpyDatetimeUnit.NPY_FR_s.value + assert ts._creso == NpyDatetimeUnit.NPY_FR_s.value elif reso == "ms": - assert ts._reso == NpyDatetimeUnit.NPY_FR_ms.value + assert ts._creso == NpyDatetimeUnit.NPY_FR_ms.value elif reso == "us": - assert ts._reso == NpyDatetimeUnit.NPY_FR_us.value + assert ts._creso == NpyDatetimeUnit.NPY_FR_us.value def test_non_nano_fields(self, dt64, ts): alt = Timestamp(dt64) @@ -771,13 +771,13 @@ def test_month_name(self, dt64, ts): assert ts.month_name() == alt.month_name() def test_tz_convert(self, ts): - ts = Timestamp._from_value_and_reso(ts.value, ts._reso, utc) + ts = Timestamp._from_value_and_reso(ts.value, ts._creso, utc) tz = pytz.timezone("US/Pacific") result = ts.tz_convert(tz) assert isinstance(result, Timestamp) - assert result._reso == ts._reso + assert result._creso == ts._creso assert tz_compare(result.tz, tz) def test_repr(self, dt64, ts): @@ -845,15 +845,15 @@ def test_cmp_cross_reso_reversed_dt64(self): def test_pickle(self, ts, tz_aware_fixture): tz = tz_aware_fixture tz = maybe_get_tz(tz) - ts = Timestamp._from_value_and_reso(ts.value, ts._reso, tz) + ts = Timestamp._from_value_and_reso(ts.value, ts._creso, tz) rt = tm.round_trip_pickle(ts) - assert rt._reso == ts._reso + assert rt._creso == ts._creso assert rt == ts def test_normalize(self, dt64, ts): alt = Timestamp(dt64) result = ts.normalize() - assert result._reso == ts._reso + assert result._creso == ts._creso assert result == alt.normalize() def test_asm8(self, dt64, ts): @@ -884,24 +884,24 @@ def test_to_period(self, dt64, ts): ) def test_addsub_timedeltalike_non_nano(self, dt64, ts, td): - exp_reso = max(ts._reso, Timedelta(td)._reso) + exp_reso = max(ts._creso, Timedelta(td)._creso) result = ts - td expected = Timestamp(dt64) - td assert isinstance(result, Timestamp) - assert result._reso == exp_reso + assert result._creso == exp_reso assert result == expected result = ts + td expected = Timestamp(dt64) + td assert isinstance(result, Timestamp) - assert result._reso == exp_reso + assert result._creso == exp_reso assert result == expected result = td + ts expected = td + Timestamp(dt64) assert isinstance(result, Timestamp) - assert result._reso == exp_reso + assert result._creso == exp_reso assert result == expected def test_addsub_offset(self, ts_tz): @@ -910,7 +910,7 @@ def test_addsub_offset(self, ts_tz): result = ts_tz + off assert isinstance(result, Timestamp) - assert result._reso == ts_tz._reso + assert result._creso == ts_tz._creso if ts_tz.month == 12 and ts_tz.day == 31: assert result.year == ts_tz.year + 1 else: @@ -922,7 +922,7 @@ def test_addsub_offset(self, ts_tz): result = ts_tz - off assert isinstance(result, Timestamp) - assert result._reso == ts_tz._reso + assert result._creso == ts_tz._creso assert result.year == ts_tz.year - 1 assert result.day == 31 assert result.month == 12 @@ -933,101 +933,101 @@ def test_sub_datetimelike_mismatched_reso(self, ts_tz): ts = ts_tz # choose a unit for `other` that doesn't match ts_tz's; - # this construction ensures we get cases with other._reso < ts._reso - # and cases with other._reso > ts._reso + # this construction ensures we get cases with other._creso < ts._creso + # and cases with other._creso > ts._creso unit = { NpyDatetimeUnit.NPY_FR_us.value: "ms", NpyDatetimeUnit.NPY_FR_ms.value: "s", NpyDatetimeUnit.NPY_FR_s.value: "us", - }[ts._reso] + }[ts._creso] other = ts._as_unit(unit) - assert other._reso != ts._reso + assert other._creso != ts._creso result = ts - other assert isinstance(result, Timedelta) assert result.value == 0 - assert result._reso == max(ts._reso, other._reso) + assert result._creso == max(ts._creso, other._creso) result = other - ts assert isinstance(result, Timedelta) assert result.value == 0 - assert result._reso == max(ts._reso, other._reso) + assert result._creso == max(ts._creso, other._creso) - if ts._reso < other._reso: + if ts._creso < other._creso: # Case where rounding is lossy - other2 = other + Timedelta._from_value_and_reso(1, other._reso) + other2 = other + Timedelta._from_value_and_reso(1, other._creso) exp = ts._as_unit(other._unit) - other2 res = ts - other2 assert res == exp - assert res._reso == max(ts._reso, other._reso) + assert res._creso == max(ts._creso, other._creso) res = other2 - ts assert res == -exp - assert res._reso == max(ts._reso, other._reso) + assert res._creso == max(ts._creso, other._creso) else: - ts2 = ts + Timedelta._from_value_and_reso(1, ts._reso) + ts2 = ts + Timedelta._from_value_and_reso(1, ts._creso) exp = ts2 - other._as_unit(ts2._unit) res = ts2 - other assert res == exp - assert res._reso == max(ts._reso, other._reso) + assert res._creso == max(ts._creso, other._creso) res = other - ts2 assert res == -exp - assert res._reso == max(ts._reso, other._reso) + assert res._creso == max(ts._creso, other._creso) def test_sub_timedeltalike_mismatched_reso(self, ts_tz): # case with non-lossy rounding ts = ts_tz # choose a unit for `other` that doesn't match ts_tz's; - # this construction ensures we get cases with other._reso < ts._reso - # and cases with other._reso > ts._reso + # this construction ensures we get cases with other._creso < ts._creso + # and cases with other._creso > ts._creso unit = { NpyDatetimeUnit.NPY_FR_us.value: "ms", NpyDatetimeUnit.NPY_FR_ms.value: "s", NpyDatetimeUnit.NPY_FR_s.value: "us", - }[ts._reso] + }[ts._creso] other = Timedelta(0)._as_unit(unit) - assert other._reso != ts._reso + assert other._creso != ts._creso result = ts + other assert isinstance(result, Timestamp) assert result == ts - assert result._reso == max(ts._reso, other._reso) + assert result._creso == max(ts._creso, other._creso) result = other + ts assert isinstance(result, Timestamp) assert result == ts - assert result._reso == max(ts._reso, other._reso) + assert result._creso == max(ts._creso, other._creso) - if ts._reso < other._reso: + if ts._creso < other._creso: # Case where rounding is lossy - other2 = other + Timedelta._from_value_and_reso(1, other._reso) + other2 = other + Timedelta._from_value_and_reso(1, other._creso) exp = ts._as_unit(other._unit) + other2 res = ts + other2 assert res == exp - assert res._reso == max(ts._reso, other._reso) + assert res._creso == max(ts._creso, other._creso) res = other2 + ts assert res == exp - assert res._reso == max(ts._reso, other._reso) + assert res._creso == max(ts._creso, other._creso) else: - ts2 = ts + Timedelta._from_value_and_reso(1, ts._reso) + ts2 = ts + Timedelta._from_value_and_reso(1, ts._creso) exp = ts2 + other._as_unit(ts2._unit) res = ts2 + other assert res == exp - assert res._reso == max(ts._reso, other._reso) + assert res._creso == max(ts._creso, other._creso) res = other + ts2 assert res == exp - assert res._reso == max(ts._reso, other._reso) + assert res._creso == max(ts._creso, other._creso) def test_addition_doesnt_downcast_reso(self): # https://github.com/pandas-dev/pandas/pull/48748#pullrequestreview-1122635413 ts = Timestamp(year=2022, month=1, day=1, microsecond=999999)._as_unit("us") td = Timedelta(microseconds=1)._as_unit("us") res = ts + td - assert res._reso == ts._reso + assert res._creso == ts._creso def test_sub_timedelta64_mismatched_reso(self, ts_tz): ts = ts_tz @@ -1035,36 +1035,36 @@ def test_sub_timedelta64_mismatched_reso(self, ts_tz): res = ts + np.timedelta64(1, "ns") exp = ts._as_unit("ns") + np.timedelta64(1, "ns") assert exp == res - assert exp._reso == NpyDatetimeUnit.NPY_FR_ns.value + assert exp._creso == NpyDatetimeUnit.NPY_FR_ns.value def test_min(self, ts): assert ts.min <= ts - assert ts.min._reso == ts._reso + assert ts.min._creso == ts._creso assert ts.min.value == NaT.value + 1 def test_max(self, ts): assert ts.max >= ts - assert ts.max._reso == ts._reso + assert ts.max._creso == ts._creso assert ts.max.value == np.iinfo(np.int64).max def test_resolution(self, ts): - expected = Timedelta._from_value_and_reso(1, ts._reso) + expected = Timedelta._from_value_and_reso(1, ts._creso) result = ts.resolution assert result == expected - assert result._reso == expected._reso + assert result._creso == expected._creso def test_timestamp_class_min_max_resolution(): # when accessed on the class (as opposed to an instance), we default # to nanoseconds assert Timestamp.min == Timestamp(NaT.value + 1) - assert Timestamp.min._reso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timestamp.min._creso == NpyDatetimeUnit.NPY_FR_ns.value assert Timestamp.max == Timestamp(np.iinfo(np.int64).max) - assert Timestamp.max._reso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timestamp.max._creso == NpyDatetimeUnit.NPY_FR_ns.value assert Timestamp.resolution == Timedelta(1) - assert Timestamp.resolution._reso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timestamp.resolution._creso == NpyDatetimeUnit.NPY_FR_ns.value class TestAsUnit: @@ -1075,27 +1075,27 @@ def test_as_unit(self): res = ts._as_unit("us") assert res.value == ts.value // 1000 - assert res._reso == NpyDatetimeUnit.NPY_FR_us.value + assert res._creso == NpyDatetimeUnit.NPY_FR_us.value rt = res._as_unit("ns") assert rt.value == ts.value - assert rt._reso == ts._reso + assert rt._creso == ts._creso res = ts._as_unit("ms") assert res.value == ts.value // 1_000_000 - assert res._reso == NpyDatetimeUnit.NPY_FR_ms.value + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value rt = res._as_unit("ns") assert rt.value == ts.value - assert rt._reso == ts._reso + assert rt._creso == ts._creso res = ts._as_unit("s") assert res.value == ts.value // 1_000_000_000 - assert res._reso == NpyDatetimeUnit.NPY_FR_s.value + assert res._creso == NpyDatetimeUnit.NPY_FR_s.value rt = res._as_unit("ns") assert rt.value == ts.value - assert rt._reso == ts._reso + assert rt._creso == ts._creso def test_as_unit_overflows(self): # microsecond that would be just out of bounds for nano @@ -1108,7 +1108,7 @@ def test_as_unit_overflows(self): res = ts._as_unit("ms") assert res.value == us // 1000 - assert res._reso == NpyDatetimeUnit.NPY_FR_ms.value + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value def test_as_unit_rounding(self): ts = Timestamp(1_500_000) # i.e. 1500 microseconds @@ -1117,7 +1117,7 @@ def test_as_unit_rounding(self): expected = Timestamp(1_000_000) # i.e. 1 millisecond assert res == expected - assert res._reso == NpyDatetimeUnit.NPY_FR_ms.value + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value assert res.value == 1 with pytest.raises(ValueError, match="Cannot losslessly convert units"): diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 874575fa9ad4c..a05da73ac3031 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -72,11 +72,11 @@ def test_tz_localize_ambiguous_bool(self, unit): result = ts.tz_localize("US/Central", ambiguous=True) assert result == expected0 - assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = ts.tz_localize("US/Central", ambiguous=False) assert result == expected1 - assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value def test_tz_localize_ambiguous(self): ts = Timestamp("2014-11-02 01:00") @@ -270,7 +270,7 @@ def test_timestamp_tz_localize_nonexistent_shift( assert result == expected.replace(microsecond=0, nanosecond=0) else: assert result == expected - assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value @pytest.mark.parametrize("offset", [-1, 1]) @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 9c376c7a13efc..6d9cfa51d2210 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -154,7 +154,7 @@ def test_ceil(self, unit): result = dt.ceil("D") expected = Timestamp("20130102") assert result == expected - assert result._reso == dt._reso + assert result._creso == dt._creso @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) def test_floor(self, unit): @@ -162,7 +162,7 @@ def test_floor(self, unit): result = dt.floor("D") expected = Timestamp("20130101") assert result == expected - assert result._reso == dt._reso + assert result._creso == dt._creso @pytest.mark.parametrize("method", ["ceil", "round", "floor"]) @pytest.mark.parametrize( @@ -176,14 +176,14 @@ def test_round_dst_border_ambiguous(self, method, unit): # result = getattr(ts, method)("H", ambiguous=True) assert result == ts - assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = getattr(ts, method)("H", ambiguous=False) expected = Timestamp("2017-10-29 01:00:00", tz="UTC").tz_convert( "Europe/Madrid" ) assert result == expected - assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = getattr(ts, method)("H", ambiguous="NaT") assert result is NaT @@ -210,7 +210,7 @@ def test_round_dst_border_nonexistent(self, method, ts_str, freq, unit): result = getattr(ts, method)(freq, nonexistent="shift_forward") expected = Timestamp("2018-03-11 03:00:00", tz="America/Chicago") assert result == expected - assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = getattr(ts, method)(freq, nonexistent="NaT") assert result is NaT @@ -363,7 +363,7 @@ def test_replace_non_nano(self): assert ts.to_pydatetime() == datetime(4869, 12, 28) result = ts.replace(year=4900) - assert result._reso == ts._reso + assert result._creso == ts._creso assert result.to_pydatetime() == datetime(4900, 12, 28) def test_replace_naive(self): @@ -490,7 +490,7 @@ def test_replace_dst_border(self, unit): result = t.replace(hour=3) expected = Timestamp("2013-11-3 03:00:00", tz="America/Chicago") assert result == expected - assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value @pytest.mark.parametrize("fold", [0, 1]) @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) @@ -504,7 +504,7 @@ def test_replace_dst_fold(self, fold, tz, unit): tz, ambiguous=not fold ) assert result == expected - assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value # -------------------------------------------------------------- # Timestamp.normalize @@ -517,7 +517,7 @@ def test_normalize(self, tz_naive_fixture, arg, unit): result = ts.normalize() expected = Timestamp("2013-11-30", tz=tz) assert result == expected - assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value def test_normalize_pre_epoch_dates(self): # GH: 36294 diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 4dd1b32ba65e4..2b5457fc9f7b3 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -698,7 +698,7 @@ def test_to_datetime_dt64s_out_of_bounds(self, cache, dt): # as of 2022-09-28, the Timestamp constructor has been updated # to cast to M8[s] but to_datetime has not ts = Timestamp(dt) - assert ts._reso == NpyDatetimeUnit.NPY_FR_s.value + assert ts._creso == NpyDatetimeUnit.NPY_FR_s.value assert ts.asm8 == dt msg = "Out of bounds nanosecond timestamp" diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 34266e55d9ea9..0862fe430e430 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -575,7 +575,7 @@ def test_add_dt64_ndarray_non_nano(self, offset_types, unit, request): result = dta + off exp_unit = unit - if isinstance(off, Tick) and off._reso > dta._reso: + if isinstance(off, Tick) and off._creso > dta._creso: # cast to higher reso like we would with Timedelta scalar exp_unit = Timedelta(off)._unit expected = expected._as_unit(exp_unit) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index e74ab61d3881d..97dcd0b011b62 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -217,12 +217,12 @@ def __init__(self, index, warn: bool = True) -> None: if isinstance(index, ABCIndex): # error: Item "ndarray[Any, Any]" of "Union[ExtensionArray, # ndarray[Any, Any]]" has no attribute "_ndarray" - self._reso = get_unit_from_dtype( + self._creso = get_unit_from_dtype( index._data._ndarray.dtype # type: ignore[union-attr] ) else: # otherwise we have DTA/TDA - self._reso = get_unit_from_dtype(index._ndarray.dtype) + self._creso = get_unit_from_dtype(index._ndarray.dtype) # This moves the values, which are implicitly in UTC, to the # the timezone so they are in local time @@ -277,7 +277,7 @@ def get_freq(self) -> str | None: return None delta = self.deltas[0] - ppd = periods_per_day(self._reso) + ppd = periods_per_day(self._creso) if delta and _is_multiple(delta, ppd): return self._infer_daily_rule() @@ -316,17 +316,17 @@ def get_freq(self) -> str | None: @cache_readonly def day_deltas(self) -> list[int]: - ppd = periods_per_day(self._reso) + ppd = periods_per_day(self._creso) return [x / ppd for x in self.deltas] @cache_readonly def hour_deltas(self) -> list[int]: - pph = periods_per_day(self._reso) // 24 + pph = periods_per_day(self._creso) // 24 return [x / pph for x in self.deltas] @cache_readonly def fields(self) -> np.ndarray: # structured array of fields - return build_field_sarray(self.i8values, reso=self._reso) + return build_field_sarray(self.i8values, reso=self._creso) @cache_readonly def rep_stamp(self) -> Timestamp: @@ -377,7 +377,7 @@ def _infer_daily_rule(self) -> str | None: return None def _get_daily_rule(self) -> str | None: - ppd = periods_per_day(self._reso) + ppd = periods_per_day(self._creso) days = self.deltas[0] / ppd if days % 7 == 0: # Weekly @@ -433,7 +433,7 @@ def _is_business_daily(self) -> bool: # probably business daily, but need to confirm first_weekday = self.index[0].weekday() shifts = np.diff(self.index.asi8) - ppd = periods_per_day(self._reso) + ppd = periods_per_day(self._creso) shifts = np.floor_divide(shifts, ppd) weekdays = np.mod(first_weekday + np.cumsum(shifts), 7) From 41c2c1272753c68c835358c84dcb3ee44ea38755 Mon Sep 17 00:00:00 2001 From: H L Date: Sun, 16 Oct 2022 04:59:44 -0700 Subject: [PATCH 3/8] DOC: Fixed Issue: Typo of DataFrame.iat() in 10 minutes to panda (#49122) --- doc/source/user_guide/10min.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst index 06508e9af9660..6fc53fe09d791 100644 --- a/doc/source/user_guide/10min.rst +++ b/doc/source/user_guide/10min.rst @@ -233,7 +233,7 @@ For getting fast access to a scalar (equivalent to the prior method): Selection by position ~~~~~~~~~~~~~~~~~~~~~ -See more in :ref:`Selection by Position ` using :meth:`DataFrame.iloc` or :meth:`DataFrame.at`. +See more in :ref:`Selection by Position ` using :meth:`DataFrame.iloc` or :meth:`DataFrame.iat`. Select via the position of the passed integers: From de6a5065a165cee75c2247cf1187994d4d38728a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 16 Oct 2022 05:01:31 -0700 Subject: [PATCH 4/8] REF: reso->creso (#49123) --- pandas/_libs/index.pyx | 12 ++-- pandas/_libs/tslibs/timestamps.pyx | 6 +- pandas/_libs/tslibs/tzconversion.pxd | 4 +- pandas/_libs/tslibs/tzconversion.pyi | 4 +- pandas/_libs/tslibs/tzconversion.pyx | 82 ++++++++++++++-------------- pandas/_libs/tslibs/vectorized.pyx | 12 ++-- pandas/core/arrays/datetimes.py | 2 +- 7 files changed, 61 insertions(+), 61 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index cc0174f795ebe..f968e879498b2 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -490,11 +490,11 @@ cdef class ObjectEngine(IndexEngine): cdef class DatetimeEngine(Int64Engine): cdef: - NPY_DATETIMEUNIT reso + NPY_DATETIMEUNIT _creso def __init__(self, ndarray values): super().__init__(values.view("i8")) - self.reso = get_unit_from_dtype(values.dtype) + self._creso = get_unit_from_dtype(values.dtype) cdef int64_t _unbox_scalar(self, scalar) except? -1: # NB: caller is responsible for ensuring tzawareness compat @@ -502,12 +502,12 @@ cdef class DatetimeEngine(Int64Engine): if scalar is NaT: return NaT.value elif isinstance(scalar, _Timestamp): - if scalar._creso == self.reso: + if scalar._creso == self._creso: return scalar.value else: # Note: caller is responsible for catching potential ValueError # from _as_creso - return (<_Timestamp>scalar)._as_creso(self.reso, round_ok=False).value + return (<_Timestamp>scalar)._as_creso(self._creso, round_ok=False).value raise TypeError(scalar) def __contains__(self, val: object) -> bool: @@ -570,12 +570,12 @@ cdef class TimedeltaEngine(DatetimeEngine): if scalar is NaT: return NaT.value elif isinstance(scalar, _Timedelta): - if scalar._creso == self.reso: + if scalar._creso == self._creso: return scalar.value else: # Note: caller is responsible for catching potential ValueError # from _as_creso - return (<_Timedelta>scalar)._as_creso(self.reso, round_ok=False).value + return (<_Timedelta>scalar)._as_creso(self._creso, round_ok=False).value raise TypeError(scalar) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 30ead1d4e3142..2e7b336e3536a 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -2099,10 +2099,10 @@ default 'raise' value = tz_localize_to_utc_single(self.value, tz, ambiguous=ambiguous, nonexistent=nonexistent, - reso=self._creso) + creso=self._creso) elif tz is None: # reset tz - value = tz_convert_from_utc_single(self.value, self.tz, reso=self._creso) + value = tz_convert_from_utc_single(self.value, self.tz, creso=self._creso) else: raise TypeError( @@ -2245,7 +2245,7 @@ default 'raise' fold = self.fold if tzobj is not None: - value = tz_convert_from_utc_single(value, tzobj, reso=self._creso) + value = tz_convert_from_utc_single(value, tzobj, creso=self._creso) # setup components pandas_datetime_to_datetimestruct(value, self._creso, &dts) diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 3a6a6f4e10035..1b95899e5c037 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -9,14 +9,14 @@ from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT cpdef int64_t tz_convert_from_utc_single( - int64_t utc_val, tzinfo tz, NPY_DATETIMEUNIT reso=* + int64_t utc_val, tzinfo tz, NPY_DATETIMEUNIT creso=* ) except? -1 cdef int64_t tz_localize_to_utc_single( int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=*, - NPY_DATETIMEUNIT reso=*, + NPY_DATETIMEUNIT creso=*, ) except? -1 diff --git a/pandas/_libs/tslibs/tzconversion.pyi b/pandas/_libs/tslibs/tzconversion.pyi index fab73f96b0dfb..a354765a348ec 100644 --- a/pandas/_libs/tslibs/tzconversion.pyi +++ b/pandas/_libs/tslibs/tzconversion.pyi @@ -10,12 +10,12 @@ from pandas._typing import npt # tz_convert_from_utc_single exposed for testing def tz_convert_from_utc_single( - val: np.int64, tz: tzinfo, reso: int = ... + val: np.int64, tz: tzinfo, creso: int = ... ) -> np.int64: ... def tz_localize_to_utc( vals: npt.NDArray[np.int64], tz: tzinfo | None, ambiguous: str | bool | Iterable[bool] | None = ..., nonexistent: str | timedelta | np.timedelta64 | None = ..., - reso: int = ..., # NPY_DATETIMEUNIT + creso: int = ..., # NPY_DATETIMEUNIT ) -> npt.NDArray[np.int64]: ... diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 953ba10993973..e2812178a2b43 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -54,7 +54,7 @@ cdef const int64_t[::1] _deltas_placeholder = np.array([], dtype=np.int64) cdef class Localizer: # cdef: # tzinfo tz - # NPY_DATETIMEUNIT _reso + # NPY_DATETIMEUNIT _creso # bint use_utc, use_fixed, use_tzlocal, use_dst, use_pytz # ndarray trans # Py_ssize_t ntrans @@ -64,9 +64,9 @@ cdef class Localizer: @cython.initializedcheck(False) @cython.boundscheck(False) - def __cinit__(self, tzinfo tz, NPY_DATETIMEUNIT reso): + def __cinit__(self, tzinfo tz, NPY_DATETIMEUNIT creso): self.tz = tz - self._creso = reso + self._creso = creso self.use_utc = self.use_tzlocal = self.use_fixed = False self.use_dst = self.use_pytz = False self.ntrans = -1 # placeholder @@ -82,22 +82,22 @@ cdef class Localizer: else: trans, deltas, typ = get_dst_info(tz) - if reso != NPY_DATETIMEUNIT.NPY_FR_ns: + if creso != NPY_DATETIMEUNIT.NPY_FR_ns: # NB: using floordiv here is implicitly assuming we will # never see trans or deltas that are not an integer number # of seconds. # TODO: avoid these np.array calls - if reso == NPY_DATETIMEUNIT.NPY_FR_us: + if creso == NPY_DATETIMEUNIT.NPY_FR_us: trans = np.array(trans) // 1_000 deltas = np.array(deltas) // 1_000 - elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: + elif creso == NPY_DATETIMEUNIT.NPY_FR_ms: trans = np.array(trans) // 1_000_000 deltas = np.array(deltas) // 1_000_000 - elif reso == NPY_DATETIMEUNIT.NPY_FR_s: + elif creso == NPY_DATETIMEUNIT.NPY_FR_s: trans = np.array(trans) // 1_000_000_000 deltas = np.array(deltas) // 1_000_000_000 else: - raise NotImplementedError(reso) + raise NotImplementedError(creso) self.trans = trans self.ntrans = self.trans.shape[0] @@ -121,7 +121,7 @@ cdef class Localizer: return utc_val elif self.use_tzlocal: return utc_val + _tz_localize_using_tzinfo_api( - utc_val, self.tz, to_utc=False, reso=self._creso, fold=fold + utc_val, self.tz, to_utc=False, creso=self._creso, fold=fold ) elif self.use_fixed: return utc_val + self.delta @@ -140,7 +140,7 @@ cdef int64_t tz_localize_to_utc_single( tzinfo tz, object ambiguous=None, object nonexistent=None, - NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, + NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_ns, ) except? -1: """See tz_localize_to_utc.__doc__""" cdef: @@ -155,18 +155,18 @@ cdef int64_t tz_localize_to_utc_single( return val elif is_tzlocal(tz) or is_zoneinfo(tz): - return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True, reso=reso) + return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True, creso=creso) elif is_fixed_offset(tz): _, deltas, _ = get_dst_info(tz) delta = deltas[0] # TODO: de-duplicate with Localizer.__init__ - if reso != NPY_DATETIMEUNIT.NPY_FR_ns: - if reso == NPY_DATETIMEUNIT.NPY_FR_us: + if creso != NPY_DATETIMEUNIT.NPY_FR_ns: + if creso == NPY_DATETIMEUNIT.NPY_FR_us: delta = delta // 1000 - elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: + elif creso == NPY_DATETIMEUNIT.NPY_FR_ms: delta = delta // 1_000_000 - elif reso == NPY_DATETIMEUNIT.NPY_FR_s: + elif creso == NPY_DATETIMEUNIT.NPY_FR_s: delta = delta // 1_000_000_000 return val - delta @@ -177,7 +177,7 @@ cdef int64_t tz_localize_to_utc_single( tz, ambiguous=ambiguous, nonexistent=nonexistent, - reso=reso, + creso=creso, )[0] @@ -188,7 +188,7 @@ def tz_localize_to_utc( tzinfo tz, object ambiguous=None, object nonexistent=None, - NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, + NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_ns, ): """ Localize tzinfo-naive i8 to given time zone (using pytz). If @@ -216,7 +216,7 @@ def tz_localize_to_utc( nonexistent : {None, "NaT", "shift_forward", "shift_backward", "raise", \ timedelta-like} How to handle non-existent times when converting wall times to UTC - reso : NPY_DATETIMEUNIT, default NPY_FR_ns + creso : NPY_DATETIMEUNIT, default NPY_FR_ns Returns ------- @@ -236,8 +236,8 @@ timedelta-like} bint shift_forward = False, shift_backward = False bint fill_nonexist = False str stamp - Localizer info = Localizer(tz, reso=reso) - int64_t pph = periods_per_day(reso) // 24 + Localizer info = Localizer(tz, creso=creso) + int64_t pph = periods_per_day(creso) // 24 # Vectorized version of DstTzInfo.localize if info.use_utc: @@ -252,7 +252,7 @@ timedelta-like} result[i] = NPY_NAT else: result[i] = v - _tz_localize_using_tzinfo_api( - v, tz, to_utc=True, reso=reso + v, tz, to_utc=True, creso=creso ) return result.base # to return underlying ndarray @@ -294,7 +294,7 @@ timedelta-like} shift_backward = True elif PyDelta_Check(nonexistent): from .timedeltas import delta_to_nanoseconds - shift_delta = delta_to_nanoseconds(nonexistent, reso=reso) + shift_delta = delta_to_nanoseconds(nonexistent, reso=creso) elif nonexistent not in ('raise', None): msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', " "shift_backwards} or a timedelta object") @@ -303,13 +303,13 @@ timedelta-like} # Determine whether each date lies left of the DST transition (store in # result_a) or right of the DST transition (store in result_b) result_a, result_b =_get_utc_bounds( - vals, info.tdata, info.ntrans, info.deltas, reso=reso + vals, info.tdata, info.ntrans, info.deltas, creso=creso ) # silence false-positive compiler warning dst_hours = np.empty(0, dtype=np.int64) if infer_dst: - dst_hours = _get_dst_hours(vals, result_a, result_b, reso=reso) + dst_hours = _get_dst_hours(vals, result_a, result_b, creso=creso) # Pre-compute delta_idx_offset that will be used if we go down non-existent # paths. @@ -348,7 +348,7 @@ timedelta-like} # TODO: test with non-nano; parametrize test_dt_round_tz_ambiguous result[i] = NPY_NAT else: - stamp = _render_tstamp(val, reso=reso) + stamp = _render_tstamp(val, creso=creso) raise pytz.AmbiguousTimeError( f"Cannot infer dst time from {stamp}, try using the " "'ambiguous' argument" @@ -386,7 +386,7 @@ timedelta-like} elif fill_nonexist: result[i] = NPY_NAT else: - stamp = _render_tstamp(val, reso=reso) + stamp = _render_tstamp(val, creso=creso) raise pytz.NonExistentTimeError(stamp) return result.base # .base to get underlying ndarray @@ -422,10 +422,10 @@ cdef inline Py_ssize_t bisect_right_i8(int64_t *data, return left -cdef inline str _render_tstamp(int64_t val, NPY_DATETIMEUNIT reso): +cdef inline str _render_tstamp(int64_t val, NPY_DATETIMEUNIT creso): """ Helper function to render exception messages""" from pandas._libs.tslibs.timestamps import Timestamp - ts = Timestamp._from_value_and_reso(val, reso, None) + ts = Timestamp._from_value_and_reso(val, creso, None) return str(ts) @@ -434,7 +434,7 @@ cdef _get_utc_bounds( int64_t* tdata, Py_ssize_t ntrans, const int64_t[::1] deltas, - NPY_DATETIMEUNIT reso, + NPY_DATETIMEUNIT creso, ): # Determine whether each date lies left of the DST transition (store in # result_a) or right of the DST transition (store in result_b) @@ -444,7 +444,7 @@ cdef _get_utc_bounds( Py_ssize_t i, n = vals.size int64_t val, v_left, v_right Py_ssize_t isl, isr, pos_left, pos_right - int64_t ppd = periods_per_day(reso) + int64_t ppd = periods_per_day(creso) result_a = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0) result_b = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0) @@ -486,11 +486,11 @@ cdef _get_utc_bounds( @cython.boundscheck(False) cdef ndarray[int64_t] _get_dst_hours( - # vals, reso only needed here to potential render an exception message + # vals, creso only needed here to potential render an exception message const int64_t[:] vals, ndarray[int64_t] result_a, ndarray[int64_t] result_b, - NPY_DATETIMEUNIT reso, + NPY_DATETIMEUNIT creso, ): cdef: Py_ssize_t i, n = vals.shape[0] @@ -519,7 +519,7 @@ cdef ndarray[int64_t] _get_dst_hours( if trans_idx.size == 1: # see test_tz_localize_to_utc_ambiguous_infer - stamp = _render_tstamp(vals[trans_idx[0]], reso=reso) + stamp = _render_tstamp(vals[trans_idx[0]], creso=creso) raise pytz.AmbiguousTimeError( f"Cannot infer dst time from {stamp} as there " "are no repeated times" @@ -541,7 +541,7 @@ cdef ndarray[int64_t] _get_dst_hours( delta = np.diff(result_a[grp]) if grp.size == 1 or np.all(delta > 0): # see test_tz_localize_to_utc_ambiguous_infer - stamp = _render_tstamp(vals[grp[0]], reso=reso) + stamp = _render_tstamp(vals[grp[0]], creso=creso) raise pytz.AmbiguousTimeError(stamp) # Find the index for the switch and pull from a for dst and b @@ -567,7 +567,7 @@ cdef ndarray[int64_t] _get_dst_hours( # Timezone Conversion cpdef int64_t tz_convert_from_utc_single( - int64_t utc_val, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns + int64_t utc_val, tzinfo tz, NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_ns ) except? -1: """ Convert the val (in i8) from UTC to tz @@ -578,14 +578,14 @@ cpdef int64_t tz_convert_from_utc_single( ---------- utc_val : int64 tz : tzinfo - reso : NPY_DATETIMEUNIT, default NPY_FR_ns + creso : NPY_DATETIMEUNIT, default NPY_FR_ns Returns ------- converted: int64 """ cdef: - Localizer info = Localizer(tz, reso=reso) + Localizer info = Localizer(tz, creso=creso) Py_ssize_t pos # Note: caller is responsible for ensuring utc_val != NPY_NAT @@ -598,7 +598,7 @@ cdef int64_t _tz_localize_using_tzinfo_api( int64_t val, tzinfo tz, bint to_utc=True, - NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, + NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_ns, bint* fold=NULL, ) except? -1: """ @@ -613,7 +613,7 @@ cdef int64_t _tz_localize_using_tzinfo_api( tz : tzinfo to_utc : bint True if converting _to_ UTC, False if going the other direction. - reso : NPY_DATETIMEUNIT + creso : NPY_DATETIMEUNIT fold : bint*, default NULL pointer to fold: whether datetime ends up in a fold or not after adjustment. @@ -633,9 +633,9 @@ cdef int64_t _tz_localize_using_tzinfo_api( datetime dt int64_t delta timedelta td - int64_t pps = periods_per_second(reso) + int64_t pps = periods_per_second(creso) - pandas_datetime_to_datetimestruct(val, reso, &dts) + pandas_datetime_to_datetimestruct(val, creso, &dts) # datetime_new is cython-optimized constructor if not to_utc: diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index c1784c53a7857..6a6b156af3dc4 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -56,7 +56,7 @@ def tz_convert_from_utc(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_FR_ ndarray[int64] """ cdef: - Localizer info = Localizer(tz, reso=reso) + Localizer info = Localizer(tz, creso=reso) int64_t utc_val, local_val Py_ssize_t pos, i, n = stamps.size @@ -131,7 +131,7 @@ def ints_to_pydatetime( ndarray[object] of type specified by box """ cdef: - Localizer info = Localizer(tz, reso=reso) + Localizer info = Localizer(tz, creso=reso) int64_t utc_val, local_val Py_ssize_t i, n = stamps.size Py_ssize_t pos = -1 # unused, avoid not-initialized warning @@ -234,7 +234,7 @@ def get_resolution( ) -> Resolution: # stamps is int64_t, any ndim cdef: - Localizer info = Localizer(tz, reso=reso) + Localizer info = Localizer(tz, creso=reso) int64_t utc_val, local_val Py_ssize_t i, n = stamps.size Py_ssize_t pos = -1 # unused, avoid not-initialized warning @@ -286,7 +286,7 @@ cpdef ndarray normalize_i8_timestamps(ndarray stamps, tzinfo tz, NPY_DATETIMEUNI result : int64 ndarray of converted of normalized nanosecond timestamps """ cdef: - Localizer info = Localizer(tz, reso=reso) + Localizer info = Localizer(tz, creso=reso) int64_t utc_val, local_val, res_val Py_ssize_t i, n = stamps.size Py_ssize_t pos = -1 # unused, avoid not-initialized warning @@ -333,7 +333,7 @@ def is_date_array_normalized(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso) - is_normalized : bool True if all stamps are normalized """ cdef: - Localizer info = Localizer(tz, reso=reso) + Localizer info = Localizer(tz, creso=reso) int64_t utc_val, local_val Py_ssize_t i, n = stamps.size Py_ssize_t pos = -1 # unused, avoid not-initialized warning @@ -364,7 +364,7 @@ def dt64arr_to_periodarr( ): # stamps is int64_t, arbitrary ndim cdef: - Localizer info = Localizer(tz, reso=reso) + Localizer info = Localizer(tz, creso=reso) Py_ssize_t i, n = stamps.size Py_ssize_t pos = -1 # unused, avoid not-initialized warning int64_t utc_val, local_val, res_val diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 9ecd9473c903b..3337fd5f3cbd6 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1021,7 +1021,7 @@ def tz_localize( tz, ambiguous=ambiguous, nonexistent=nonexistent, - reso=self._creso, + creso=self._creso, ) new_dates = new_dates.view(f"M8[{self._unit}]") dtype = tz_to_dtype(tz, unit=self._unit) From 73b85f0d286bfb45f74017053d09f9e2bfd05ea2 Mon Sep 17 00:00:00 2001 From: Matheus Pedroni <34845106+mathpn@users.noreply.github.com> Date: Sun, 16 Oct 2022 09:03:09 -0300 Subject: [PATCH 5/8] DOC: fix versionchanged blank line usage (#49117) (#49125) --- pandas/plotting/_core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 5bfc8c6b5b371..e340ea31deef4 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -692,14 +692,17 @@ class PlotAccessor(PandasObject): The matplotlib line style per column. logx : bool or 'sym', default False Use log scaling or symlog scaling on x axis. + .. versionchanged:: 0.25.0 logy : bool or 'sym' default False Use log scaling or symlog scaling on y axis. + .. versionchanged:: 0.25.0 loglog : bool or 'sym', default False Use log scaling or symlog scaling on both x and y axes. + .. versionchanged:: 0.25.0 xticks : sequence From 9049179ab5b9dce09f87f67f70f0fa8320dc59f9 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 16 Oct 2022 13:05:25 +0100 Subject: [PATCH 6/8] BUG: _guess_datetime_format_for_array doesn't guess if first element is '' or 'NaT' (#49120) --- pandas/_libs/tslib.pyi | 1 + pandas/_libs/tslib.pyx | 17 +++++++++++++++++ pandas/core/tools/datetimes.py | 5 ++--- pandas/tests/tools/test_to_datetime.py | 8 +++++--- 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslib.pyi b/pandas/_libs/tslib.pyi index 2212f8db8ea1e..8fec9ecf27f30 100644 --- a/pandas/_libs/tslib.pyi +++ b/pandas/_libs/tslib.pyi @@ -16,6 +16,7 @@ def array_with_unit_to_datetime( unit: str, errors: str = ..., ) -> tuple[np.ndarray, tzinfo | None]: ... +def first_non_null(values: np.ndarray) -> int: ... def array_to_datetime( values: npt.NDArray[np.object_], errors: str = ..., diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index a1271ef0d897e..03331f54db892 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -421,6 +421,23 @@ def array_with_unit_to_datetime( return oresult, tz +@cython.wraparound(False) +@cython.boundscheck(False) +def first_non_null(values: ndarray) -> int: + """Find position of first non-null value, return -1 if there isn't one.""" + cdef: + Py_ssize_t n = len(values) + Py_ssize_t i + int result + for i in range(n): + val = values[i] + if checknull_with_nat_and_na(val): + continue + if isinstance(val, str) and (len(val) == 0 or val in nat_strings): + continue + return i + else: + return -1 @cython.wraparound(False) @cython.boundscheck(False) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 8566468d4e23f..7791ea804a52a 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -126,9 +126,8 @@ class FulldatetimeDict(YearMonthDayDict, total=False): def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str | None: # Try to guess the format based on the first non-NaN element, return None if can't - non_nan_elements = notna(arr).nonzero()[0] - if len(non_nan_elements): - if type(first_non_nan_element := arr[non_nan_elements[0]]) is str: + if (first_non_null := tslib.first_non_null(arr)) != -1: + if type(first_non_nan_element := arr[first_non_null]) is str: # GH#32264 np.str_ object return guess_datetime_format(first_non_nan_element, dayfirst=dayfirst) return None diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 2b5457fc9f7b3..263f2b597947a 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2093,9 +2093,8 @@ def test_to_datetime_dta_tz(self, klass): class TestGuessDatetimeFormat: - @td.skip_if_not_us_locale @pytest.mark.parametrize( - "test_array", + "test_list", [ [ "2011-12-30 00:00:00.000000", @@ -2103,11 +2102,14 @@ class TestGuessDatetimeFormat: "2011-12-30 00:00:00.000000", ], [np.nan, np.nan, "2011-12-30 00:00:00.000000"], + ["", "2011-12-30 00:00:00.000000"], + ["NaT", "2011-12-30 00:00:00.000000"], ["2011-12-30 00:00:00.000000", "random_string"], ], ) - def test_guess_datetime_format_for_array(self, test_array): + def test_guess_datetime_format_for_array(self, test_list): expected_format = "%Y-%m-%d %H:%M:%S.%f" + test_array = np.array(test_list, dtype=object) assert tools._guess_datetime_format_for_array(test_array) == expected_format @td.skip_if_not_us_locale From 28c4629bc2f44d093fa6e8f110391f915c775fdf Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 16 Oct 2022 13:53:11 +0100 Subject: [PATCH 7/8] BUG: guess_datetime_format doesn't guess just year (#49127) * guess %Y format * fixup Co-authored-by: MarcoGorelli <> --- pandas/_libs/tslibs/parsing.pyx | 8 ++++++-- pandas/tests/tslibs/test_parsing.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index e0fcc829ad326..5c93edfee79f2 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -1053,8 +1053,12 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: found_attrs.update(attrs) break - # Only consider it a valid guess if we have a year, month and day - if len({'year', 'month', 'day'} & found_attrs) != 3: + # Only consider it a valid guess if we have a year, month and day, + # unless it's %Y which is both common and unambiguous. + if ( + len({'year', 'month', 'day'} & found_attrs) != 3 + and format_guess != ['%Y'] + ): return None output_format = [] diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 9588f54388d1e..fcfca5a27763b 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -147,6 +147,7 @@ def test_parsers_month_freq(date_str, expected): [ ("20111230", "%Y%m%d"), ("2011-12-30", "%Y-%m-%d"), + ("2011", "%Y"), ("30-12-2011", "%d-%m-%Y"), ("2011-12-30 00:00:00", "%Y-%m-%d %H:%M:%S"), ("2011-12-30T00:00:00", "%Y-%m-%dT%H:%M:%S"), @@ -208,7 +209,6 @@ def test_guess_datetime_format_with_locale_specific_formats(string, fmt): @pytest.mark.parametrize( "invalid_dt", [ - "2013", "01/2013", "12:00:00", "1/1/1/1", From 9f915d7aa6303eac65934607ba43cf20a2899e5b Mon Sep 17 00:00:00 2001 From: Arda Kosar Date: Sun, 16 Oct 2022 23:31:52 -0400 Subject: [PATCH 8/8] added ArrowJsonParser and tests --- pandas/io/json/_json.py | 90 ++++++++++-------- pandas/io/json/arrow_json_parser_wrapper.py | 100 ++++++++++++++++++++ pandas/tests/io/json/conftest.py | 8 ++ pandas/tests/io/json/test_readlines.py | 32 +++++++ 4 files changed, 192 insertions(+), 38 deletions(-) create mode 100644 pandas/io/json/arrow_json_parser_wrapper.py diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index a0491f327cd63..d8783ee6d1169 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -73,8 +73,7 @@ build_table_schema, parse_table_schema, ) -from pandas.io.parsers.arrow_parser_wrapper import ArrowParserWrapper -from pandas.io.parsers.base_parser import ParserBase +from pandas.io.json.arrow_json_parser_wrapper import ArrowJsonParserWrapper from pandas.io.parsers.readers import validate_integer if TYPE_CHECKING: @@ -383,7 +382,7 @@ def read_json( date_unit: str | None = ..., encoding: str | None = ..., encoding_errors: str | None = ..., - engine: JSONEngine | None = ..., + engine: JSONEngine = ..., lines: bool = ..., chunksize: int, compression: CompressionOptions = ..., @@ -408,7 +407,7 @@ def read_json( date_unit: str | None = ..., encoding: str | None = ..., encoding_errors: str | None = ..., - engine: JSONEngine | None = ..., + engine: JSONEngine = ..., lines: bool = ..., chunksize: int, compression: CompressionOptions = ..., @@ -433,7 +432,7 @@ def read_json( date_unit: str | None = ..., encoding: str | None = ..., encoding_errors: str | None = ..., - engine: JSONEngine | None = ..., + engine: JSONEngine = ..., lines: bool = ..., chunksize: None = ..., compression: CompressionOptions = ..., @@ -457,7 +456,7 @@ def read_json( date_unit: str | None = ..., encoding: str | None = ..., encoding_errors: str | None = ..., - engine: JSONEngine | None = None, + engine: JSONEngine = ..., lines: bool = ..., chunksize: None = ..., compression: CompressionOptions = ..., @@ -486,7 +485,7 @@ def read_json( date_unit: str | None = None, encoding: str | None = None, encoding_errors: str | None = "strict", - engine: JSONEngine | None = None, + engine: JSONEngine = "ujson", lines: bool = False, chunksize: int | None = None, compression: CompressionOptions = "infer", @@ -615,7 +614,7 @@ def read_json( .. versionadded:: 1.3.0 - engine : {{'ujson', 'pyarrow'}} + engine : {{'ujson', 'pyarrow'}}, default "ujson" Parser engine to use. lines : bool, default False @@ -792,13 +791,13 @@ def __init__( precise_float: bool, date_unit, encoding, - engine, lines: bool, chunksize: int | None, compression: CompressionOptions, nrows: int | None, storage_options: StorageOptions = None, encoding_errors: str | None = "strict", + engine: JSONEngine = "ujson", ) -> None: self.orient = orient @@ -829,33 +828,45 @@ def __init__( self.nrows = validate_integer("nrows", self.nrows, 0) if not self.lines: raise ValueError("nrows can only be passed if lines=True") + if self.engine == "pyarrow": + if not self.lines: + raise ValueError( + "currently pyarrow engine only supports " + "the line-delimited JSON format" + ) - if engine is not None: + if self.engine == "pyarrow": self._engine = self._make_engine(filepath_or_buffer, self.engine) - else: + if self.engine == "ujson": data = self._get_data_from_filepath(filepath_or_buffer) self.data = self._preprocess_data(data) def _make_engine( self, filepath_or_buffer: FilePath | ReadBuffer[str] | ReadBuffer[bytes], - engine: JSONEngine, - ) -> ParserBase: - - mapping: dict[str, type[ParserBase]] = { - "pyarrow": ArrowParserWrapper, - "ujson": ..., - } - - if engine not in mapping: - raise ValueError( - f"Unknown engine: {engine} (valid options are {mapping.keys()})" - ) + engine: JSONEngine = "pyarrow", + ) -> ArrowJsonParserWrapper: if not isinstance(filepath_or_buffer, list): - ... + is_text = False + mode = "rb" + self.handles = get_handle( + filepath_or_buffer, + mode=mode, + encoding=self.encoding, + is_text=is_text, + compression=self.compression, + storage_options=self.storage_options, + errors=self.encoding_errors, + ) + filepath_or_buffer = self.handles.handle - return mapping[engine](filepath_or_buffer) + try: + return ArrowJsonParserWrapper(filepath_or_buffer) + except Exception: + if self.handles is not None: + self.handles.close() + raise def _preprocess_data(self, data): """ @@ -939,20 +950,23 @@ def read(self) -> DataFrame | Series: Read the whole JSON input into a pandas object. """ obj: DataFrame | Series - if self.lines: - if self.chunksize: - obj = concat(self) - elif self.nrows: - lines = list(islice(self.data, self.nrows)) - lines_json = self._combine_lines(lines) - obj = self._get_object_parser(lines_json) + if self.engine == "pyarrow": + obj = self._engine.read() + if self.engine == "ujson": + if self.lines: + if self.chunksize: + obj = concat(self) + elif self.nrows: + lines = list(islice(self.data, self.nrows)) + lines_json = self._combine_lines(lines) + obj = self._get_object_parser(lines_json) + else: + data = ensure_str(self.data) + data_lines = data.split("\n") + obj = self._get_object_parser(self._combine_lines(data_lines)) else: - data = ensure_str(self.data) - data_lines = data.split("\n") - obj = self._get_object_parser(self._combine_lines(data_lines)) - else: - obj = self._get_object_parser(self.data) - self.close() + obj = self._get_object_parser(self.data) + self.close() return obj def _get_object_parser(self, json) -> DataFrame | Series: diff --git a/pandas/io/json/arrow_json_parser_wrapper.py b/pandas/io/json/arrow_json_parser_wrapper.py new file mode 100644 index 0000000000000..bf341b7c7506b --- /dev/null +++ b/pandas/io/json/arrow_json_parser_wrapper.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from pandas._typing import ReadBuffer +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.inference import is_integer + +if TYPE_CHECKING: + from pandas import DataFrame + + +class ArrowJsonParserWrapper: + """ + Wrapper for the pyarrow engine for read_json() + """ + + def __init__(self, src: ReadBuffer[bytes]) -> None: + super().__init__() + self.src = src + + def _parse_kwd(self) -> None: + """ + Validates keywords before passing to pyarrow + """ + ... + + def _get_pyarrow_options(self) -> None: + ... + + def read(self) -> DataFrame: + """ + Reads the contents of a JSON file into a DataFrame and + processes it according to the kwargs passed in the + constructor. + + Returns + ------- + DataFrame + The DataFrame created from the JSON file. + """ + pyarrow_json = import_optional_dependency("pyarrow.json") + table = pyarrow_json.read_json(self.src) + + frame = table.to_pandas() + return frame + + def _finalize_output(self, frame: DataFrame) -> DataFrame: + """ + Processes data read in based on kwargs. + + Parameters + ---------- + frame: DataFrame + The DataFrame to process. + + Returns + ------- + DataFrame + The processed DataFrame. + """ + num_cols = len(frame.columns) + multi_index_named = True + if self.header is None: + if self.names is None: + if self.prefix is not None: + self.names = [f"{self.prefix}{i}" for i in range(num_cols)] + elif self.header is None: + self.names = range(num_cols) + if len(self.names) != num_cols: + # usecols is passed through to pyarrow, we only handle index col here + # The only way self.names is not the same length as number of cols is + # if we have int index_col. We should just pad the names(they will get + # removed anyways) to expected length then. + self.names = list(range(num_cols - len(self.names))) + self.names + multi_index_named = False + frame.columns = self.names + # we only need the frame not the names + frame.columns, frame = self._do_date_conversions(frame.columns, frame) + if self.index_col is not None: + for i, item in enumerate(self.index_col): + if is_integer(item): + self.index_col[i] = frame.columns[item] + else: + # String case + if item not in frame.columns: + raise ValueError(f"Index {item} invalid") + frame.set_index(self.index_col, drop=True, inplace=True) + # Clear names if headerless and no name given + if self.header is None and not multi_index_named: + frame.index.names = [None] * len(frame.index.names) + + if self.kwds.get("dtype") is not None: + try: + frame = frame.astype(self.kwds.get("dtype")) + except TypeError as e: + # GH#44901 reraise to keep api consistent + raise ValueError(e) + return frame diff --git a/pandas/tests/io/json/conftest.py b/pandas/tests/io/json/conftest.py index 4e848cd48b42d..6085ced7cd547 100644 --- a/pandas/tests/io/json/conftest.py +++ b/pandas/tests/io/json/conftest.py @@ -7,3 +7,11 @@ def orient(request): Fixture for orients excluding the table format. """ return request.param + + +@pytest.fixture +def json_dir_path(datapath): + """ + The directory path to the data files needed for parser tests. + """ + return datapath("io", "json", "data") diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index b371990178d28..d9c196a1577d2 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -1,4 +1,5 @@ from io import StringIO +import os from pathlib import Path from typing import Iterator @@ -27,6 +28,37 @@ def test_read_jsonl(): tm.assert_frame_equal(result, expected) +def test_read_jsonl_engine_pyarrow(json_dir_path): + # '{"a": 1, "b": 2}\n{"a": 3, "b": 4}\n{"a": 5, "b": 6}' + + result = read_json( + os.path.join(json_dir_path, "line_delimited.json"), + lines=True, + engine="pyarrow", + ) + expected = DataFrame({"a": [1, 3, 5], "b": [2, 4, 6]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.xfail +def test_read_jsonl_engine_pyarrow_lines_false(json_dir_path): + result = read_json( + os.path.join(json_dir_path, "line_delimited.json"), + engine="pyarrow", + ) + expected = DataFrame({"a": [1, 3, 5], "b": [2, 4, 6]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.xfail +def test_read_jsonl_engine_pyarrow_json_string(): + result = read_json( + '{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}', engine="pyarrow" + ) + expected = DataFrame({"a": [1, 3, 5], "b": [2, 4, 6]}) + tm.assert_frame_equal(result, expected) + + def test_read_datetime(): # GH33787 df = DataFrame(