diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index e1415e2ca23ca..9f3077e266e98 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -896,9 +896,9 @@ into ``freq`` keyword arguments. The available date offsets and associated frequ :class:`~pandas.tseries.offsets.BQuarterBegin`, ``'BQS'``, "business quarter begin" :class:`~pandas.tseries.offsets.FY5253Quarter`, ``'REQ'``, "retail (aka 52-53 week) quarter" :class:`~pandas.tseries.offsets.YearEnd`, ``'Y'``, "calendar year end" - :class:`~pandas.tseries.offsets.YearBegin`, ``'AS'`` or ``'BYS'``,"calendar year begin" - :class:`~pandas.tseries.offsets.BYearEnd`, ``'BA'``, "business year end" - :class:`~pandas.tseries.offsets.BYearBegin`, ``'BAS'``, "business year begin" + :class:`~pandas.tseries.offsets.YearBegin`, ``'YS'`` or ``'BYS'``,"calendar year begin" + :class:`~pandas.tseries.offsets.BYearEnd`, ``'BY'``, "business year end" + :class:`~pandas.tseries.offsets.BYearBegin`, ``'BYS'``, "business year begin" :class:`~pandas.tseries.offsets.FY5253`, ``'RE'``, "retail (aka 52-53 week) year" :class:`~pandas.tseries.offsets.Easter`, None, "Easter holiday" :class:`~pandas.tseries.offsets.BusinessHour`, ``'bh'``, "business hour" @@ -1259,9 +1259,9 @@ frequencies. We will refer to these aliases as *offset aliases*. "QS", "quarter start frequency" "BQS", "business quarter start frequency" "Y", "year end frequency" - "BA, BY", "business year end frequency" - "AS, YS", "year start frequency" - "BAS, BYS", "business year start frequency" + "BY", "business year end frequency" + "YS", "year start frequency" + "BYS", "business year start frequency" "h", "hourly frequency" "bh", "business hour frequency" "cbh", "custom business hour frequency" @@ -1692,7 +1692,7 @@ the end of the interval. .. warning:: The default values for ``label`` and ``closed`` is '**left**' for all - frequency offsets except for 'ME', 'Y', 'Q', 'BM', 'BA', 'BQ', and 'W' + frequency offsets except for 'ME', 'Y', 'Q', 'BM', 'BY', 'BQ', and 'W' which all have a default of 'right'. This might unintendedly lead to looking ahead, where the value for a later diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index ae70eb078f6d9..09bf5428d0432 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -886,11 +886,23 @@ This would happen with a ``lexsorted``, but non-monotonic levels. (:issue:`15622 This is *unchanged* from prior versions, but shown for illustration purposes: -.. ipython:: python +.. code-block:: python - df = pd.DataFrame(np.arange(6), columns=['value'], - index=pd.MultiIndex.from_product([list('BA'), range(3)])) - df + In [81]: df = pd.DataFrame(np.arange(6), columns=['value'], + ....: index=pd.MultiIndex.from_product([list('BA'), range(3)])) + ....: + In [82]: df + + Out[82]: + value + B 0 0 + 1 1 + 2 2 + A 0 3 + 1 4 + 2 5 + + [6 rows x 1 columns] .. code-block:: python diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 29a2d5c0b5877..6861c1fc1068a 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -348,6 +348,7 @@ I/O - Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`) - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`) - Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`) +- Bug in :meth:`DataFrame.to_json` OverflowError with np.long* dtypes (:issue:`55403`) - Bug in :meth:`pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`) Period @@ -394,6 +395,7 @@ Other ^^^^^ - Bug in :func:`cut` incorrectly allowing cutting of timezone-aware datetimes with timezone-naive bins (:issue:`54964`) - Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`) +- Bug in rendering ``inf`` values inside a a :class:`DataFrame` with the ``use_inf_as_na`` option enabled (:issue:`55483`) - Bug in rendering a :class:`Series` with a :class:`MultiIndex` when one of the index level's names is 0 not having that name displayed (:issue:`55415`) - diff --git a/pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h b/pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h index 54bcca9e4136c..649cc2a2692d0 100644 --- a/pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h +++ b/pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h @@ -138,19 +138,20 @@ typedef int64_t JSLONG; #endif enum JSTYPES { - JT_NULL, // NULL - JT_TRUE, // boolean true - JT_FALSE, // boolean false - JT_INT, // (JSINT32 (signed 32-bit)) - JT_LONG, // (JSINT64 (signed 64-bit)) - JT_DOUBLE, // (double) - JT_BIGNUM, // integer larger than sys.maxsize - JT_UTF8, // (char 8-bit) - JT_ARRAY, // Array structure - JT_OBJECT, // Key/Value structure - JT_INVALID, // Internal, do not return nor expect - JT_POS_INF, // Positive infinity - JT_NEG_INF, // Negative infinity + JT_NULL, // NULL + JT_TRUE, // boolean true + JT_FALSE, // boolean false + JT_INT, // (JSINT32 (signed 32-bit)) + JT_LONG, // (JSINT64 (signed 64-bit)) + JT_DOUBLE, // (double) + JT_BIGNUM, // integer larger than sys.maxsize + JT_UTF8, // (char 8-bit) + JT_ARRAY, // Array structure + JT_OBJECT, // Key/Value structure + JT_INVALID, // Internal, do not return nor expect + JT_POS_INF, // Positive infinity + JT_NEG_INF, // Negative infinity + JT_LONG_DOUBLE // Long Double }; typedef void * JSOBJ; @@ -181,7 +182,7 @@ typedef struct __JSONObjectEncoder { size_t *_outLen); JSINT64 (*getLongValue)(JSOBJ obj, JSONTypeContext *tc); JSINT32 (*getIntValue)(JSOBJ obj, JSONTypeContext *tc); - double (*getDoubleValue)(JSOBJ obj, JSONTypeContext *tc); + long double (*getLongDoubleValue)(JSOBJ obj, JSONTypeContext *tc); const char *(*getBigNumStringValue)(JSOBJ obj, JSONTypeContext *tc, size_t *_outLen); diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi index d5c9f1342a089..282dcee3ed6cf 100644 --- a/pandas/_libs/missing.pyi +++ b/pandas/_libs/missing.pyi @@ -14,4 +14,3 @@ def isneginf_scalar(val: object) -> bool: ... def checknull(val: object, inf_as_na: bool = ...) -> bool: ... def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ... def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ... -def is_float_nan(values: np.ndarray) -> npt.NDArray[np.bool_]: ... diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index e3e7d8daa03e1..8ef59b46ca25f 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -255,31 +255,6 @@ cdef bint checknull_with_nat_and_na(object obj): return checknull_with_nat(obj) or obj is C_NA -@cython.wraparound(False) -@cython.boundscheck(False) -def is_float_nan(values: ndarray) -> ndarray: - """ - True for elements which correspond to a float nan - - Returns - ------- - ndarray[bool] - """ - cdef: - ndarray[uint8_t] result - Py_ssize_t i, N - object val - - N = len(values) - result = np.zeros(N, dtype=np.uint8) - - for i in range(N): - val = values[i] - if util.is_nan(val): - result[i] = True - return result.view(bool) - - @cython.wraparound(False) @cython.boundscheck(False) def is_numeric_na(values: ndarray) -> ndarray: diff --git a/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c index 942bd0b518144..4b1273e1e254c 100644 --- a/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c +++ b/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c @@ -74,7 +74,7 @@ The extra 2 bytes are for the quotes around the string */ #define RESERVE_STRING(_len) (2 + ((_len)*6)) -static const double g_pow10[] = {1, +static const long double g_pow10[] = {1, 10, 100, 1000, @@ -784,29 +784,29 @@ void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) { enc->offset += (wstr - (enc->offset)); } -int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, - double value) { +int Buffer_AppendLongDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, + long double value) { /* if input is beyond the thresholds, revert to exponential */ - const double thres_max = (double)1e16 - 1; - const double thres_min = (double)1e-15; + const long double thres_max = (long double)1e16 - 1; + const long double thres_min = (long double)1e-15; char precision_str[20]; int count; - double diff = 0.0; + long double diff = 0.0; char *str = enc->offset; char *wstr = str; unsigned long long whole; - double tmp; + long double tmp; unsigned long long frac; int neg; - double pow10; + long double pow10; if (value == HUGE_VAL || value == -HUGE_VAL) { - SetError(obj, enc, "Invalid Inf value when encoding double"); + SetError(obj, enc, "Invalid Inf value when encoding long double"); return FALSE; } if (!(value == value)) { - SetError(obj, enc, "Invalid Nan value when encoding double"); + SetError(obj, enc, "Invalid Nan value when encoding long double"); return FALSE; } @@ -942,7 +942,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, This reservation must hold length of _name as encoded worst case + - maxLength of double to string OR maxLength of JSLONG to string + maxLength of long double to string OR maxLength of JSLONG to string */ Buffer_Reserve(enc, 256 + RESERVE_STRING(cbName)); @@ -1076,9 +1076,9 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, break; } - case JT_DOUBLE: { - if (!Buffer_AppendDoubleUnchecked(obj, enc, - enc->getDoubleValue(obj, &tc))) { + case JT_LONG_DOUBLE: { + if (!Buffer_AppendLongDoubleUnchecked(obj, enc, + enc->getLongDoubleValue(obj, &tc))) { enc->endTypeContext(obj, &tc); enc->level--; return; diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 8c55505f61b51..48c1a45507390 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -105,7 +105,7 @@ typedef struct __TypeContext { PyObject *attrList; PyObject *iterator; - double doubleValue; + long double longDoubleValue; JSINT64 longValue; char *cStr; @@ -164,7 +164,7 @@ static TypeContext *createTypeContext(void) { pc->index = 0; pc->size = 0; pc->longValue = 0; - pc->doubleValue = 0.0; + pc->longDoubleValue = (long double) 0.0; pc->cStr = NULL; pc->npyarr = NULL; pc->pdblock = NULL; @@ -1494,8 +1494,8 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { if (npy_isnan(val) || npy_isinf(val)) { tc->type = JT_NULL; } else { - pc->doubleValue = val; - tc->type = JT_DOUBLE; + pc->longDoubleValue = (long double) val; + tc->type = JT_LONG_DOUBLE; } return; } else if (PyBytes_Check(obj)) { @@ -1507,8 +1507,8 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { tc->type = JT_UTF8; return; } else if (object_is_decimal_type(obj)) { - pc->doubleValue = PyFloat_AsDouble(obj); - tc->type = JT_DOUBLE; + pc->longDoubleValue = (long double) PyFloat_AsDouble(obj); + tc->type = JT_LONG_DOUBLE; return; } else if (PyDateTime_Check(obj) || PyDate_Check(obj)) { if (object_is_nat_type(obj)) { @@ -1605,10 +1605,16 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { PyArray_DescrFromType(NPY_BOOL)); tc->type = (pc->longValue) ? JT_TRUE : JT_FALSE; return; - } else if (PyArray_IsScalar(obj, Float) || PyArray_IsScalar(obj, Double)) { - PyArray_CastScalarToCtype(obj, &(pc->doubleValue), + } else if (PyArray_IsScalar(obj, Float) || + PyArray_IsScalar(obj, Double)) { + PyArray_CastScalarToCtype(obj, &(pc->longDoubleValue), PyArray_DescrFromType(NPY_DOUBLE)); - tc->type = JT_DOUBLE; + tc->type = JT_LONG_DOUBLE; + return; + } else if (PyArray_IsScalar(obj, LongDouble)) { + PyArray_CastScalarToCtype(obj, &(pc->longDoubleValue), + PyArray_DescrFromType(NPY_LONGDOUBLE)); + tc->type = JT_LONG_DOUBLE; return; } else if (PyArray_Check(obj) && PyArray_CheckScalar(obj)) { PyErr_Format(PyExc_TypeError, @@ -1925,8 +1931,8 @@ JSINT64 Object_getLongValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { return GET_TC(tc)->longValue; } -double Object_getDoubleValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - return GET_TC(tc)->doubleValue; +long double Object_getLongDoubleValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->longDoubleValue; } const char *Object_getBigNumStringValue(JSOBJ obj, JSONTypeContext *tc, @@ -1970,7 +1976,6 @@ PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, if (PyDateTimeAPI == NULL) { return NULL; } - PandasDateTime_IMPORT; if (PandasDateTimeAPI == NULL) { return NULL; @@ -2006,7 +2011,7 @@ PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, Object_getStringValue, Object_getLongValue, NULL, // getIntValue is unused - Object_getDoubleValue, + Object_getLongDoubleValue, Object_getBigNumStringValue, Object_iterBegin, Object_iterNext, diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 86f620beeec3b..26181d8f15518 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -192,9 +192,6 @@ OFFSET_TO_PERIOD_FREQSTR: dict = { "BQS": "Q", "QS": "Q", "BQ": "Q", - "BA": "Y", - "AS": "Y", - "BAS": "Y", "MS": "M", "D": "D", "B": "B", @@ -205,9 +202,9 @@ OFFSET_TO_PERIOD_FREQSTR: dict = { "ns": "ns", "h": "h", "Q": "Q", - "Y": "Y", "W": "W", "ME": "M", + "Y": "Y", "BY": "Y", "YS": "Y", "BYS": "Y", @@ -244,6 +241,45 @@ DEPR_ABBREVS: dict[str, str]= { "A-SEP": "Y-SEP", "A-OCT": "Y-OCT", "A-NOV": "Y-NOV", + "BA": "BY", + "BA-DEC": "BY-DEC", + "BA-JAN": "BY-JAN", + "BA-FEB": "BY-FEB", + "BA-MAR": "BY-MAR", + "BA-APR": "BY-APR", + "BA-MAY": "BY-MAY", + "BA-JUN": "BY-JUN", + "BA-JUL": "BY-JUL", + "BA-AUG": "BY-AUG", + "BA-SEP": "BY-SEP", + "BA-OCT": "BY-OCT", + "BA-NOV": "BY-NOV", + "AS": "YS", + "AS-DEC": "YS-DEC", + "AS-JAN": "YS-JAN", + "AS-FEB": "YS-FEB", + "AS-MAR": "YS-MAR", + "AS-APR": "YS-APR", + "AS-MAY": "YS-MAY", + "AS-JUN": "YS-JUN", + "AS-JUL": "YS-JUL", + "AS-AUG": "YS-AUG", + "AS-SEP": "YS-SEP", + "AS-OCT": "YS-OCT", + "AS-NOV": "YS-NOV", + "BAS": "BYS", + "BAS-DEC": "BYS-DEC", + "BAS-JAN": "BYS-JAN", + "BAS-FEB": "BYS-FEB", + "BAS-MAR": "BYS-MAR", + "BAS-APR": "BYS-APR", + "BAS-MAY": "BYS-MAY", + "BAS-JUN": "BYS-JUN", + "BAS-JUL": "BYS-JUL", + "BAS-AUG": "BYS-AUG", + "BAS-SEP": "BYS-SEP", + "BAS-OCT": "BYS-OCT", + "BAS-NOV": "BYS-NOV", "H": "h", "BH": "bh", "CBH": "cbh", diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index ad37add17967d..a726c735bf9a1 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -253,8 +253,8 @@ def get_start_end_field( # month of year. Other offsets use month, startingMonth as ending # month of year. - if (freqstr[0:2] in ["MS", "QS", "AS"]) or ( - freqstr[1:3] in ["MS", "QS", "AS"]): + if (freqstr[0:2] in ["MS", "QS", "YS"]) or ( + freqstr[1:3] in ["MS", "QS", "YS"]): end_month = 12 if month_kw == 1 else month_kw - 1 start_month = month_kw else: diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 042d5dafe3046..6a6f30de8dade 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -2414,7 +2414,7 @@ cdef class BYearEnd(YearOffset): _outputName = "BusinessYearEnd" _default_month = 12 - _prefix = "BA" + _prefix = "BY" _day_opt = "business_end" @@ -2453,7 +2453,7 @@ cdef class BYearBegin(YearOffset): _outputName = "BusinessYearBegin" _default_month = 1 - _prefix = "BAS" + _prefix = "BYS" _day_opt = "business_start" @@ -2552,7 +2552,7 @@ cdef class YearBegin(YearOffset): """ _default_month = 1 - _prefix = "AS" + _prefix = "YS" _day_opt = "start" @@ -4540,10 +4540,10 @@ CDay = CustomBusinessDay prefix_mapping = { offset._prefix: offset for offset in [ - YearBegin, # 'AS' + YearBegin, # 'YS' YearEnd, # 'Y' - BYearBegin, # 'BAS' - BYearEnd, # 'BA' + BYearBegin, # 'BYS' + BYearEnd, # 'BY' BusinessDay, # 'B' BusinessMonthBegin, # 'BMS' BusinessMonthEnd, # 'BM' @@ -4584,12 +4584,9 @@ _lite_rule_alias = { "Q": "Q-DEC", "Y": "Y-DEC", # YearEnd(month=12), - "AS": "AS-JAN", # YearBegin(month=1), - "YS": "AS-JAN", - "BA": "BA-DEC", # BYearEnd(month=12), - "BY": "BA-DEC", - "BAS": "BAS-JAN", # BYearBegin(month=1), - "BYS": "BAS-JAN", + "YS": "YS-JAN", # YearBegin(month=1), + "BY": "BY-DEC", # BYearEnd(month=12), + "BYS": "BYS-JAN", # BYearBegin(month=1), "Min": "min", "min": "min", diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 60c42c01e9f6f..c91f892936640 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2526,7 +2526,7 @@ def _round_temporally( raise ValueError(f"Must specify a valid frequency: {freq}") pa_supported_unit = { "Y": "year", - "AS": "year", + "YS": "year", "Q": "quarter", "QS": "quarter", "M": "month", diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 31c143ee012bb..e7b7ecba60e0b 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1663,7 +1663,14 @@ def __repr__(self) -> str: self, self._formatter(), indent_for_name=False ).rstrip(", \n") class_name = f"<{type(self).__name__}>\n" - return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" + footer = self._get_repr_footer() + return f"{class_name}{data}\n{footer}" + + def _get_repr_footer(self) -> str: + # GH#24278 + if self.ndim > 1: + return f"Shape: {self.shape}, dtype: {self.dtype}" + return f"Length: {len(self)}, dtype: {self.dtype}" def _repr_2d(self) -> str: from pandas.io.formats.printing import format_object_summary @@ -1679,7 +1686,8 @@ def _repr_2d(self) -> str: ] data = ",\n".join(lines) class_name = f"<{type(self).__name__}>" - return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}" + footer = self._get_repr_footer() + return f"{class_name}\n[\n{data}\n]\n{footer}" def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]: """ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 5059f5d000ccd..e19635af6ab0b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2177,7 +2177,7 @@ def _repr_categories(self) -> list[str]: category_strs = [x.strip() for x in category_strs] return category_strs - def _repr_categories_info(self) -> str: + def _get_repr_footer(self) -> str: """ Returns a string representation of the footer. """ @@ -2229,7 +2229,7 @@ def __repr__(self) -> str: """ String representation. """ - footer = self._repr_categories_info() + footer = self._get_repr_footer() length = len(self) max_len = 10 if length > max_len: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c3cab965041e0..d749235e2cd2c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -37,7 +37,6 @@ is_datetime_array, no_default, ) -from pandas._libs.missing import is_float_nan from pandas._libs.tslibs import ( IncompatibleFrequency, OutOfBoundsDatetime, @@ -1390,16 +1389,8 @@ def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str if is_object_dtype(values.dtype) or is_string_dtype(values.dtype): values = np.asarray(values) - values = lib.maybe_convert_objects(values, safe=True) - - result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values] - - # could have nans - mask = is_float_nan(values) - if mask.any(): - result_arr = np.array(result) - result_arr[mask] = na_rep - result = result_arr.tolist() + # TODO: why do we need different justify for these cases? + result = trim_front(format_array(values, None, justify="all")) else: result = trim_front(format_array(values, None, justify="left")) return header + result diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f3b2a35f379f4..12f93cf482a1d 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -992,11 +992,11 @@ def date_range( **Specify a unit** - >>> pd.date_range(start="2017-01-01", periods=10, freq="100AS", unit="s") + >>> pd.date_range(start="2017-01-01", periods=10, freq="100YS", unit="s") DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01', '2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01', '2817-01-01', '2917-01-01'], - dtype='datetime64[s]', freq='100AS-JAN') + dtype='datetime64[s]', freq='100YS-JAN') """ if freq is None and com.any_none(periods, start, end): freq = "D" diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 59e6a20915c18..8b3071a6f8582 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2101,7 +2101,7 @@ def __init__( else: freq = to_offset(freq) - end_types = {"ME", "Y", "Q", "BM", "BA", "BQ", "W"} + end_types = {"ME", "Y", "Q", "BM", "BY", "BQ", "W"} rule = freq.rule_code if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types): if closed is None: @@ -2299,7 +2299,7 @@ def _adjust_bin_edges( if self.freq.name in ("BM", "ME", "W") or self.freq.name.split("-")[0] in ( "BQ", - "BA", + "BY", "Q", "Y", "W", diff --git a/pandas/core/series.py b/pandas/core/series.py index c2eea371ddef3..fdd03debf6de4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5729,7 +5729,7 @@ def to_timestamp( 2023-01-01 1 2024-01-01 2 2025-01-01 3 - Freq: AS-JAN, dtype: int64 + Freq: YS-JAN, dtype: int64 Using `freq` which is the offset that the Timestamps will have diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index cac83e2a48972..bb976b3a0208e 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -259,11 +259,12 @@ def _get_footer(self) -> str: name = self.series.name footer = "" - if getattr(self.series.index, "freq", None) is not None: - assert isinstance( - self.series.index, (DatetimeIndex, PeriodIndex, TimedeltaIndex) - ) - footer += f"Freq: {self.series.index.freqstr}" + index = self.series.index + if ( + isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)) + and index.freq is not None + ): + footer += f"Freq: {index.freqstr}" if self.name is not False and name is not None: if footer: @@ -289,7 +290,7 @@ def _get_footer(self) -> str: # level infos are added to the end and in a new line, like it is done # for Categoricals if isinstance(self.tr_series.dtype, CategoricalDtype): - level_info = self.tr_series._values._repr_categories_info() + level_info = self.tr_series._values._get_repr_footer() if footer: footer += "\n" footer += level_info @@ -1215,18 +1216,16 @@ def _format_strings(self) -> list[str]: def _format(x): if self.na_rep is not None and is_scalar(x) and isna(x): - try: - # try block for np.isnat specifically - # determine na_rep if x is None or NaT-like - if x is None: - return "None" - elif x is NA: - return str(NA) - elif x is NaT or np.isnat(x): - return "NaT" - except (TypeError, ValueError): - # np.isnat only handles datetime or timedelta objects - pass + if x is None: + return "None" + elif x is NA: + return str(NA) + elif lib.is_float(x) and np.isinf(x): + # TODO(3.0): this will be unreachable when use_inf_as_na + # deprecation is enforced + return str(x) + elif x is NaT or isinstance(x, (np.datetime64, np.timedelta64)): + return "NaT" return self.na_rep elif isinstance(x, PandasObject): return str(x) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index df6ccda27ab85..693b8d9483407 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1594,7 +1594,7 @@ def test_dt64arr_add_sub_offset_array( Timestamp("2016-04-01"), Timestamp("2017-04-01"), ], - "AS-APR", + "YS-APR", ), ( "__sub__", @@ -1616,7 +1616,7 @@ def test_dt64arr_add_sub_offset_array( Timestamp("2015-10-01"), Timestamp("2016-10-01"), ], - "AS-OCT", + "YS-OCT", ), ], ) @@ -1625,7 +1625,7 @@ def test_dti_add_sub_nonzero_mth_offset( ): # GH 26258 tz = tz_aware_fixture - date = date_range(start="01 Jan 2014", end="01 Jan 2017", freq="AS", tz=tz) + date = date_range(start="01 Jan 2014", end="01 Jan 2017", freq="YS", tz=tz) date = tm.box_expected(date, box_with_array, False) mth = getattr(date, op) result = mth(offset) diff --git a/pandas/tests/frame/methods/test_to_timestamp.py b/pandas/tests/frame/methods/test_to_timestamp.py index aeb65d98d8ab2..859dc56de4a25 100644 --- a/pandas/tests/frame/methods/test_to_timestamp.py +++ b/pandas/tests/frame/methods/test_to_timestamp.py @@ -44,7 +44,7 @@ def test_to_timestamp(self, frame_or_series): if frame_or_series is Series: assert result.name == "A" - exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") + exp_index = date_range("1/1/2001", end="1/1/2009", freq="YS-JAN") result = obj.to_timestamp("D", "start") tm.assert_index_equal(result.index, exp_index) @@ -88,7 +88,7 @@ def test_to_timestamp_columns(self): tm.assert_index_equal(result.columns, exp_index) tm.assert_numpy_array_equal(result.values, df.values) - exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") + exp_index = date_range("1/1/2001", end="1/1/2009", freq="YS-JAN") result = df.to_timestamp("D", "start", axis=1) tm.assert_index_equal(result.columns, exp_index) @@ -112,14 +112,14 @@ def test_to_timestamp_columns(self): result1 = df.to_timestamp("5min", axis=1) result2 = df.to_timestamp("min", axis=1) - expected = date_range("2001-01-01", "2009-01-01", freq="AS") + expected = date_range("2001-01-01", "2009-01-01", freq="YS") assert isinstance(result1.columns, DatetimeIndex) assert isinstance(result2.columns, DatetimeIndex) tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) # PeriodIndex.to_timestamp always use 'infer' - assert result1.columns.freqstr == "AS-JAN" - assert result2.columns.freqstr == "AS-JAN" + assert result1.columns.freqstr == "YS-JAN" + assert result2.columns.freqstr == "YS-JAN" def test_to_timestamp_invalid_axis(self): index = period_range(freq="Y", start="1/1/2001", end="12/1/2009") diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 0634b8268c04c..63ecdfa5e001b 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -411,7 +411,7 @@ def test_to_records_with_na_record(self): def test_to_records_with_inf_as_na_record(self): # GH 48526 expected = """ NaN inf record -0 NaN b [0, inf, b] +0 inf b [0, inf, b] 1 NaN NaN [1, nan, nan] 2 e f [2, e, f]""" msg = "use_inf_as_na option is deprecated" diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 88ee8a35e5c94..76a543050097d 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -734,7 +734,7 @@ def test_list_grouper_with_nat(self): # GH 14715 df = DataFrame({"date": date_range("1/1/2011", periods=365, freq="D")}) df.iloc[-1] = pd.NaT - grouper = Grouper(key="date", freq="AS") + grouper = Grouper(key="date", freq="YS") # Grouper in a list grouping result = df.groupby([grouper]) diff --git a/pandas/tests/indexes/datetimes/methods/test_to_period.py b/pandas/tests/indexes/datetimes/methods/test_to_period.py index 8900c5cdbca14..6839fafcdc114 100644 --- a/pandas/tests/indexes/datetimes/methods/test_to_period.py +++ b/pandas/tests/indexes/datetimes/methods/test_to_period.py @@ -56,7 +56,7 @@ def test_to_period_quarterlyish(self, off): prng = rng.to_period() assert prng.freq == "Q-DEC" - @pytest.mark.parametrize("off", ["BA", "AS", "BAS"]) + @pytest.mark.parametrize("off", ["BY", "YS", "BYS"]) def test_to_period_annualish(self, off): rng = date_range("01-Jan-2012", periods=8, freq=off) prng = rng.to_period() diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 6da215715482d..077b4fa5a0696 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -648,7 +648,7 @@ def test_constructor_coverage(self): with pytest.raises(ValueError, match=msg): date_range(periods=10, freq="D") - @pytest.mark.parametrize("freq", ["AS", "W-SUN"]) + @pytest.mark.parametrize("freq", ["YS", "W-SUN"]) def test_constructor_datetime64_tzformat(self, freq): # see GH#6572: ISO 8601 format results in stdlib timezone object idx = date_range( @@ -981,8 +981,8 @@ def test_dti_constructor_years_only(self, tz_naive_fixture): rng3 = date_range("2014", "2020", freq="Y", tz=tz) expected3 = date_range("2014-12-31", "2019-12-31", freq="Y", tz=tz) - rng4 = date_range("2014", "2020", freq="AS", tz=tz) - expected4 = date_range("2014-01-01", "2020-01-01", freq="AS", tz=tz) + rng4 = date_range("2014", "2020", freq="YS", tz=tz) + expected4 = date_range("2014-01-01", "2020-01-01", freq="YS", tz=tz) for rng, expected in [ (rng1, expected1), diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index ededf78621699..a74d31747fbb0 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -243,13 +243,12 @@ def test_date_range_gen_error(self): rng = date_range("1/1/2000 00:00", "1/1/2000 00:18", freq="5min") assert len(rng) == 4 - @pytest.mark.parametrize("freq", ["AS", "YS"]) - def test_begin_year_alias(self, freq): + def test_begin_year_alias(self): # see gh-9313 - rng = date_range("1/1/2013", "7/1/2017", freq=freq) + rng = date_range("1/1/2013", "7/1/2017", freq="YS") exp = DatetimeIndex( ["2013-01-01", "2014-01-01", "2015-01-01", "2016-01-01", "2017-01-01"], - freq=freq, + freq="YS", ) tm.assert_index_equal(rng, exp) @@ -261,12 +260,11 @@ def test_end_year_alias(self): ) tm.assert_index_equal(rng, exp) - @pytest.mark.parametrize("freq", ["BA", "BY"]) - def test_business_end_year_alias(self, freq): + def test_business_end_year_alias(self): # see gh-9313 - rng = date_range("1/1/2013", "7/1/2017", freq=freq) + rng = date_range("1/1/2013", "7/1/2017", freq="BY") exp = DatetimeIndex( - ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-30"], freq=freq + ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-30"], freq="BY" ) tm.assert_index_equal(rng, exp) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 156075e3fafec..a18501a193b60 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -1,5 +1,6 @@ import datetime as dt from datetime import date +import re import dateutil import numpy as np @@ -226,3 +227,40 @@ def test_CBH_deprecated(self): ) tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "freq_depr, expected_values, expected_freq", + [ + ( + "2BA", + ["2020-12-31", "2022-12-30"], + "2BY-DEC", + ), + ( + "AS-AUG", + ["2021-08-01", "2022-08-01", "2023-08-01"], + "YS-AUG", + ), + ( + "1BAS-MAY", + ["2021-05-03", "2022-05-02", "2023-05-01"], + "1BYS-MAY", + ), + ], + ) + def test_AS_BA_BAS_deprecated(self, freq_depr, expected_values, expected_freq): + # GH#55479 + freq_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1] + msg = f"'{freq_msg}' is deprecated and will be removed in a future version." + + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = date_range( + dt.datetime(2020, 12, 1), dt.datetime(2023, 12, 1), freq=freq_depr + ) + result = DatetimeIndex( + expected_values, + dtype="datetime64[ns]", + freq=expected_freq, + ) + + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 185134af165f4..0a5287d154adc 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -146,7 +146,7 @@ def test_datetimeindex_accessors5(self): qsfeb = to_offset("QS-FEB") bq = to_offset("BQ") bqs_apr = to_offset("BQS-APR") - as_nov = to_offset("AS-NOV") + as_nov = to_offset("YS-NOV") tests = [ (freq_m.is_month_start(Timestamp("2013-06-01")), 1), diff --git a/pandas/tests/indexes/period/methods/test_to_timestamp.py b/pandas/tests/indexes/period/methods/test_to_timestamp.py index 2394efb353ab6..977ad8b26a369 100644 --- a/pandas/tests/indexes/period/methods/test_to_timestamp.py +++ b/pandas/tests/indexes/period/methods/test_to_timestamp.py @@ -49,7 +49,7 @@ def test_to_timestamp_non_contiguous(self): def test_to_timestamp_freq(self): idx = period_range("2017", periods=12, freq="Y-DEC") result = idx.to_timestamp() - expected = date_range("2017", periods=12, freq="AS-JAN") + expected = date_range("2017", periods=12, freq="YS-JAN") tm.assert_index_equal(result, expected) def test_to_timestamp_pi_nat(self): diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index d5f8c5200c4a3..edf7cb64c0913 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -762,6 +762,14 @@ def test_float_array(self, float_numpy_dtype): ) tm.assert_almost_equal(float_input, float_output) + def test_array_long_double(self): + dtype = np.longdouble + arr = np.arange(100.202, 200.202, 1, dtype=dtype) + arr = arr.reshape((5, 5, 4)) + + arr_out = np.array(ujson.ujson_loads(ujson.ujson_dumps(arr)), dtype=dtype) + tm.assert_almost_equal(arr, arr_out) + def test_float_max(self, float_numpy_dtype): klass = np.dtype(float_numpy_dtype).type num = klass(np.finfo(float_numpy_dtype).max / 10) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 28d02576156a0..f66f5bf50974e 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1172,7 +1172,7 @@ def test_resample_anchored_intraday(simple_date_range_series, unit): assert len(resampled) == 1 -@pytest.mark.parametrize("freq", ["MS", "BMS", "QS-MAR", "AS-DEC", "AS-JUN"]) +@pytest.mark.parametrize("freq", ["MS", "BMS", "QS-MAR", "YS-DEC", "YS-JUN"]) def test_resample_anchored_monthstart(simple_date_range_series, freq, unit): ts = simple_date_range_series("1/1/2000", "12/31/2002") ts.index = ts.index.as_unit(unit) @@ -1320,7 +1320,7 @@ def test_resample_unequal_times(unit): df = DataFrame({"close": 1}, index=bad_ind) # it works! - df.resample("AS").sum() + df.resample("YS").sum() def test_resample_consistency(unit): diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index d214e1b4ae4ae..6ad09f12525b4 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -660,7 +660,7 @@ def test_default_right_closed_label(self, from_freq, to_freq): @pytest.mark.parametrize( "from_freq, to_freq", - [("D", "MS"), ("Q", "AS"), ("ME", "QS"), ("h", "D"), ("min", "h")], + [("D", "MS"), ("Q", "YS"), ("ME", "QS"), ("h", "D"), ("min", "h")], ) def test_default_left_closed_label(self, from_freq, to_freq): idx = date_range(start="8/15/2012", periods=100, freq=from_freq) diff --git a/pandas/tests/tseries/frequencies/test_inference.py b/pandas/tests/tseries/frequencies/test_inference.py index 51d0dd298f841..22ff7f8405a40 100644 --- a/pandas/tests/tseries/frequencies/test_inference.py +++ b/pandas/tests/tseries/frequencies/test_inference.py @@ -52,7 +52,7 @@ def base_delta_code_pair(request): freqs = ( [f"Q-{month}" for month in MONTHS] - + [f"{annual}-{month}" for annual in ["Y", "BA"] for month in MONTHS] + + [f"{annual}-{month}" for annual in ["Y", "BY"] for month in MONTHS] + ["ME", "BM", "BMS"] + [f"WOM-{count}{day}" for count in range(1, 5) for day in DAYS] + [f"W-{day}" for day in DAYS] @@ -215,7 +215,7 @@ def test_infer_freq_index(freq, expected): "expected,dates", list( { - "AS-JAN": ["2009-01-01", "2010-01-01", "2011-01-01", "2012-01-01"], + "YS-JAN": ["2009-01-01", "2010-01-01", "2011-01-01", "2012-01-01"], "Q-OCT": ["2009-01-31", "2009-04-30", "2009-07-31", "2009-10-31"], "ME": ["2010-11-30", "2010-12-31", "2011-01-31", "2011-02-28"], "W-SAT": ["2010-12-25", "2011-01-01", "2011-01-08", "2011-01-15"], diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 7f96ea98fa047..9389f78c9e672 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -839,7 +839,7 @@ def test_rule_code(self): "NOV", "DEC", ] - base_lst = ["Y", "AS", "BA", "BAS", "Q", "QS", "BQ", "BQS"] + base_lst = ["Y", "YS", "BY", "BYS", "Q", "QS", "BQ", "BQS"] for base in base_lst: for v in suffix_lst: alias = "-".join([base, v]) @@ -858,7 +858,7 @@ def test_freq_offsets(): class TestReprNames: def test_str_for_named_is_name(self): # look at all the amazing combinations! - month_prefixes = ["Y", "AS", "BA", "BAS", "Q", "BQ", "BQS", "QS"] + month_prefixes = ["Y", "YS", "BY", "BYS", "Q", "BQ", "BQS", "QS"] names = [ prefix + "-" + month for prefix in month_prefixes diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 0ed0fe4b87576..db4fdf0d24465 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -59,7 +59,7 @@ # -------------------------------------------------------------------- # Offset related functions -_need_suffix = ["QS", "BQ", "BQS", "YS", "AS", "BY", "BA", "BYS", "BAS"] +_need_suffix = ["QS", "BQ", "BQS", "YS", "BY", "BYS"] for _prefix in _need_suffix: for _m in MONTHS: @@ -345,7 +345,7 @@ def _get_annual_rule(self) -> str | None: if pos_check is None: return None else: - return {"cs": "AS", "bs": "BAS", "ce": "Y", "be": "BA"}.get(pos_check) + return {"cs": "YS", "bs": "BYS", "ce": "Y", "be": "BY"}.get(pos_check) def _get_quarterly_rule(self) -> str | None: if len(self.mdiffs) > 1: