Skip to content

Commit 31c661f

Browse files
committed
Merge branch 'main' of https://github.com/pandas-dev/pandas into json_overflow
2 parents dd3cc48 + c9a98f0 commit 31c661f

32 files changed

+183
-129
lines changed

doc/source/user_guide/timeseries.rst

+7-7
Original file line numberDiff line numberDiff line change
@@ -896,9 +896,9 @@ into ``freq`` keyword arguments. The available date offsets and associated frequ
896896
:class:`~pandas.tseries.offsets.BQuarterBegin`, ``'BQS'``, "business quarter begin"
897897
:class:`~pandas.tseries.offsets.FY5253Quarter`, ``'REQ'``, "retail (aka 52-53 week) quarter"
898898
:class:`~pandas.tseries.offsets.YearEnd`, ``'Y'``, "calendar year end"
899-
:class:`~pandas.tseries.offsets.YearBegin`, ``'AS'`` or ``'BYS'``,"calendar year begin"
900-
:class:`~pandas.tseries.offsets.BYearEnd`, ``'BA'``, "business year end"
901-
:class:`~pandas.tseries.offsets.BYearBegin`, ``'BAS'``, "business year begin"
899+
:class:`~pandas.tseries.offsets.YearBegin`, ``'YS'`` or ``'BYS'``,"calendar year begin"
900+
:class:`~pandas.tseries.offsets.BYearEnd`, ``'BY'``, "business year end"
901+
:class:`~pandas.tseries.offsets.BYearBegin`, ``'BYS'``, "business year begin"
902902
:class:`~pandas.tseries.offsets.FY5253`, ``'RE'``, "retail (aka 52-53 week) year"
903903
:class:`~pandas.tseries.offsets.Easter`, None, "Easter holiday"
904904
:class:`~pandas.tseries.offsets.BusinessHour`, ``'bh'``, "business hour"
@@ -1259,9 +1259,9 @@ frequencies. We will refer to these aliases as *offset aliases*.
12591259
"QS", "quarter start frequency"
12601260
"BQS", "business quarter start frequency"
12611261
"Y", "year end frequency"
1262-
"BA, BY", "business year end frequency"
1263-
"AS, YS", "year start frequency"
1264-
"BAS, BYS", "business year start frequency"
1262+
"BY", "business year end frequency"
1263+
"YS", "year start frequency"
1264+
"BYS", "business year start frequency"
12651265
"h", "hourly frequency"
12661266
"bh", "business hour frequency"
12671267
"cbh", "custom business hour frequency"
@@ -1692,7 +1692,7 @@ the end of the interval.
16921692
.. warning::
16931693

16941694
The default values for ``label`` and ``closed`` is '**left**' for all
1695-
frequency offsets except for 'ME', 'Y', 'Q', 'BM', 'BA', 'BQ', and 'W'
1695+
frequency offsets except for 'ME', 'Y', 'Q', 'BM', 'BY', 'BQ', and 'W'
16961696
which all have a default of 'right'.
16971697

16981698
This might unintendedly lead to looking ahead, where the value for a later

doc/source/whatsnew/v0.20.0.rst

+16-4
Original file line numberDiff line numberDiff line change
@@ -886,11 +886,23 @@ This would happen with a ``lexsorted``, but non-monotonic levels. (:issue:`15622
886886

887887
This is *unchanged* from prior versions, but shown for illustration purposes:
888888

889-
.. ipython:: python
889+
.. code-block:: python
890890
891-
df = pd.DataFrame(np.arange(6), columns=['value'],
892-
index=pd.MultiIndex.from_product([list('BA'), range(3)]))
893-
df
891+
In [81]: df = pd.DataFrame(np.arange(6), columns=['value'],
892+
....: index=pd.MultiIndex.from_product([list('BA'), range(3)]))
893+
....:
894+
In [82]: df
895+
896+
Out[82]:
897+
value
898+
B 0 0
899+
1 1
900+
2 2
901+
A 0 3
902+
1 4
903+
2 5
904+
905+
[6 rows x 1 columns]
894906
895907
.. code-block:: python
896908

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,7 @@ Other
395395
^^^^^
396396
- Bug in :func:`cut` incorrectly allowing cutting of timezone-aware datetimes with timezone-naive bins (:issue:`54964`)
397397
- Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`)
398+
- Bug in rendering ``inf`` values inside a a :class:`DataFrame` with the ``use_inf_as_na`` option enabled (:issue:`55483`)
398399
- Bug in rendering a :class:`Series` with a :class:`MultiIndex` when one of the index level's names is 0 not having that name displayed (:issue:`55415`)
399400
-
400401

pandas/_libs/missing.pyi

-1
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,3 @@ def isneginf_scalar(val: object) -> bool: ...
1414
def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
1515
def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
1616
def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
17-
def is_float_nan(values: np.ndarray) -> npt.NDArray[np.bool_]: ...

pandas/_libs/missing.pyx

-25
Original file line numberDiff line numberDiff line change
@@ -255,31 +255,6 @@ cdef bint checknull_with_nat_and_na(object obj):
255255
return checknull_with_nat(obj) or obj is C_NA
256256

257257

258-
@cython.wraparound(False)
259-
@cython.boundscheck(False)
260-
def is_float_nan(values: ndarray) -> ndarray:
261-
"""
262-
True for elements which correspond to a float nan
263-
264-
Returns
265-
-------
266-
ndarray[bool]
267-
"""
268-
cdef:
269-
ndarray[uint8_t] result
270-
Py_ssize_t i, N
271-
object val
272-
273-
N = len(values)
274-
result = np.zeros(N, dtype=np.uint8)
275-
276-
for i in range(N):
277-
val = values[i]
278-
if util.is_nan(val):
279-
result[i] = True
280-
return result.view(bool)
281-
282-
283258
@cython.wraparound(False)
284259
@cython.boundscheck(False)
285260
def is_numeric_na(values: ndarray) -> ndarray:

pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -825,12 +825,12 @@ int Buffer_AppendLongDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc,
825825
precision_str[0] = '%';
826826
precision_str[1] = '.';
827827
#if defined(_WIN32) && defined(_MSC_VER)
828-
sprintf_s(precision_str + 2, sizeof(precision_str) - 2, "%ug",
828+
sprintf_s(precision_str + 2, sizeof(precision_str) - 2, "%uLg",
829829
enc->doublePrecision);
830830
enc->offset += sprintf_s(str, enc->end - enc->offset, precision_str,
831831
neg ? -value : value);
832832
#else
833-
snprintf(precision_str + 2, sizeof(precision_str) - 2, "%ug",
833+
snprintf(precision_str + 2, sizeof(precision_str) - 2, "%uLg",
834834
enc->doublePrecision);
835835
enc->offset += snprintf(str, enc->end - enc->offset, precision_str,
836836
neg ? -value : value);

pandas/_libs/tslibs/dtypes.pyx

+40-4
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,6 @@ OFFSET_TO_PERIOD_FREQSTR: dict = {
192192
"BQS": "Q",
193193
"QS": "Q",
194194
"BQ": "Q",
195-
"BA": "Y",
196-
"AS": "Y",
197-
"BAS": "Y",
198195
"MS": "M",
199196
"D": "D",
200197
"B": "B",
@@ -205,9 +202,9 @@ OFFSET_TO_PERIOD_FREQSTR: dict = {
205202
"ns": "ns",
206203
"h": "h",
207204
"Q": "Q",
208-
"Y": "Y",
209205
"W": "W",
210206
"ME": "M",
207+
"Y": "Y",
211208
"BY": "Y",
212209
"YS": "Y",
213210
"BYS": "Y",
@@ -244,6 +241,45 @@ DEPR_ABBREVS: dict[str, str]= {
244241
"A-SEP": "Y-SEP",
245242
"A-OCT": "Y-OCT",
246243
"A-NOV": "Y-NOV",
244+
"BA": "BY",
245+
"BA-DEC": "BY-DEC",
246+
"BA-JAN": "BY-JAN",
247+
"BA-FEB": "BY-FEB",
248+
"BA-MAR": "BY-MAR",
249+
"BA-APR": "BY-APR",
250+
"BA-MAY": "BY-MAY",
251+
"BA-JUN": "BY-JUN",
252+
"BA-JUL": "BY-JUL",
253+
"BA-AUG": "BY-AUG",
254+
"BA-SEP": "BY-SEP",
255+
"BA-OCT": "BY-OCT",
256+
"BA-NOV": "BY-NOV",
257+
"AS": "YS",
258+
"AS-DEC": "YS-DEC",
259+
"AS-JAN": "YS-JAN",
260+
"AS-FEB": "YS-FEB",
261+
"AS-MAR": "YS-MAR",
262+
"AS-APR": "YS-APR",
263+
"AS-MAY": "YS-MAY",
264+
"AS-JUN": "YS-JUN",
265+
"AS-JUL": "YS-JUL",
266+
"AS-AUG": "YS-AUG",
267+
"AS-SEP": "YS-SEP",
268+
"AS-OCT": "YS-OCT",
269+
"AS-NOV": "YS-NOV",
270+
"BAS": "BYS",
271+
"BAS-DEC": "BYS-DEC",
272+
"BAS-JAN": "BYS-JAN",
273+
"BAS-FEB": "BYS-FEB",
274+
"BAS-MAR": "BYS-MAR",
275+
"BAS-APR": "BYS-APR",
276+
"BAS-MAY": "BYS-MAY",
277+
"BAS-JUN": "BYS-JUN",
278+
"BAS-JUL": "BYS-JUL",
279+
"BAS-AUG": "BYS-AUG",
280+
"BAS-SEP": "BYS-SEP",
281+
"BAS-OCT": "BYS-OCT",
282+
"BAS-NOV": "BYS-NOV",
247283
"H": "h",
248284
"BH": "bh",
249285
"CBH": "cbh",

pandas/_libs/tslibs/fields.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,8 @@ def get_start_end_field(
253253
# month of year. Other offsets use month, startingMonth as ending
254254
# month of year.
255255

256-
if (freqstr[0:2] in ["MS", "QS", "AS"]) or (
257-
freqstr[1:3] in ["MS", "QS", "AS"]):
256+
if (freqstr[0:2] in ["MS", "QS", "YS"]) or (
257+
freqstr[1:3] in ["MS", "QS", "YS"]):
258258
end_month = 12 if month_kw == 1 else month_kw - 1
259259
start_month = month_kw
260260
else:

pandas/_libs/tslibs/offsets.pyx

+9-12
Original file line numberDiff line numberDiff line change
@@ -2414,7 +2414,7 @@ cdef class BYearEnd(YearOffset):
24142414

24152415
_outputName = "BusinessYearEnd"
24162416
_default_month = 12
2417-
_prefix = "BA"
2417+
_prefix = "BY"
24182418
_day_opt = "business_end"
24192419

24202420

@@ -2453,7 +2453,7 @@ cdef class BYearBegin(YearOffset):
24532453

24542454
_outputName = "BusinessYearBegin"
24552455
_default_month = 1
2456-
_prefix = "BAS"
2456+
_prefix = "BYS"
24572457
_day_opt = "business_start"
24582458

24592459

@@ -2552,7 +2552,7 @@ cdef class YearBegin(YearOffset):
25522552
"""
25532553

25542554
_default_month = 1
2555-
_prefix = "AS"
2555+
_prefix = "YS"
25562556
_day_opt = "start"
25572557

25582558

@@ -4540,10 +4540,10 @@ CDay = CustomBusinessDay
45404540
prefix_mapping = {
45414541
offset._prefix: offset
45424542
for offset in [
4543-
YearBegin, # 'AS'
4543+
YearBegin, # 'YS'
45444544
YearEnd, # 'Y'
4545-
BYearBegin, # 'BAS'
4546-
BYearEnd, # 'BA'
4545+
BYearBegin, # 'BYS'
4546+
BYearEnd, # 'BY'
45474547
BusinessDay, # 'B'
45484548
BusinessMonthBegin, # 'BMS'
45494549
BusinessMonthEnd, # 'BM'
@@ -4584,12 +4584,9 @@ _lite_rule_alias = {
45844584
"Q": "Q-DEC",
45854585

45864586
"Y": "Y-DEC", # YearEnd(month=12),
4587-
"AS": "AS-JAN", # YearBegin(month=1),
4588-
"YS": "AS-JAN",
4589-
"BA": "BA-DEC", # BYearEnd(month=12),
4590-
"BY": "BA-DEC",
4591-
"BAS": "BAS-JAN", # BYearBegin(month=1),
4592-
"BYS": "BAS-JAN",
4587+
"YS": "YS-JAN", # YearBegin(month=1),
4588+
"BY": "BY-DEC", # BYearEnd(month=12),
4589+
"BYS": "BYS-JAN", # BYearBegin(month=1),
45934590

45944591
"Min": "min",
45954592
"min": "min",

pandas/core/arrays/arrow/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2526,7 +2526,7 @@ def _round_temporally(
25262526
raise ValueError(f"Must specify a valid frequency: {freq}")
25272527
pa_supported_unit = {
25282528
"Y": "year",
2529-
"AS": "year",
2529+
"YS": "year",
25302530
"Q": "quarter",
25312531
"QS": "quarter",
25322532
"M": "month",

pandas/core/arrays/base.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -1663,7 +1663,14 @@ def __repr__(self) -> str:
16631663
self, self._formatter(), indent_for_name=False
16641664
).rstrip(", \n")
16651665
class_name = f"<{type(self).__name__}>\n"
1666-
return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"
1666+
footer = self._get_repr_footer()
1667+
return f"{class_name}{data}\n{footer}"
1668+
1669+
def _get_repr_footer(self) -> str:
1670+
# GH#24278
1671+
if self.ndim > 1:
1672+
return f"Shape: {self.shape}, dtype: {self.dtype}"
1673+
return f"Length: {len(self)}, dtype: {self.dtype}"
16671674

16681675
def _repr_2d(self) -> str:
16691676
from pandas.io.formats.printing import format_object_summary
@@ -1679,7 +1686,8 @@ def _repr_2d(self) -> str:
16791686
]
16801687
data = ",\n".join(lines)
16811688
class_name = f"<{type(self).__name__}>"
1682-
return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}"
1689+
footer = self._get_repr_footer()
1690+
return f"{class_name}\n[\n{data}\n]\n{footer}"
16831691

16841692
def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
16851693
"""

pandas/core/arrays/categorical.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2177,7 +2177,7 @@ def _repr_categories(self) -> list[str]:
21772177
category_strs = [x.strip() for x in category_strs]
21782178
return category_strs
21792179

2180-
def _repr_categories_info(self) -> str:
2180+
def _get_repr_footer(self) -> str:
21812181
"""
21822182
Returns a string representation of the footer.
21832183
"""
@@ -2229,7 +2229,7 @@ def __repr__(self) -> str:
22292229
"""
22302230
String representation.
22312231
"""
2232-
footer = self._repr_categories_info()
2232+
footer = self._get_repr_footer()
22332233
length = len(self)
22342234
max_len = 10
22352235
if length > max_len:

pandas/core/indexes/base.py

+2-11
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
is_datetime_array,
3838
no_default,
3939
)
40-
from pandas._libs.missing import is_float_nan
4140
from pandas._libs.tslibs import (
4241
IncompatibleFrequency,
4342
OutOfBoundsDatetime,
@@ -1390,16 +1389,8 @@ def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str
13901389

13911390
if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
13921391
values = np.asarray(values)
1393-
values = lib.maybe_convert_objects(values, safe=True)
1394-
1395-
result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]
1396-
1397-
# could have nans
1398-
mask = is_float_nan(values)
1399-
if mask.any():
1400-
result_arr = np.array(result)
1401-
result_arr[mask] = na_rep
1402-
result = result_arr.tolist()
1392+
# TODO: why do we need different justify for these cases?
1393+
result = trim_front(format_array(values, None, justify="all"))
14031394
else:
14041395
result = trim_front(format_array(values, None, justify="left"))
14051396
return header + result

pandas/core/indexes/datetimes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -992,11 +992,11 @@ def date_range(
992992
993993
**Specify a unit**
994994
995-
>>> pd.date_range(start="2017-01-01", periods=10, freq="100AS", unit="s")
995+
>>> pd.date_range(start="2017-01-01", periods=10, freq="100YS", unit="s")
996996
DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01',
997997
'2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01',
998998
'2817-01-01', '2917-01-01'],
999-
dtype='datetime64[s]', freq='100AS-JAN')
999+
dtype='datetime64[s]', freq='100YS-JAN')
10001000
"""
10011001
if freq is None and com.any_none(periods, start, end):
10021002
freq = "D"

pandas/core/resample.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2101,7 +2101,7 @@ def __init__(
21012101
else:
21022102
freq = to_offset(freq)
21032103

2104-
end_types = {"ME", "Y", "Q", "BM", "BA", "BQ", "W"}
2104+
end_types = {"ME", "Y", "Q", "BM", "BY", "BQ", "W"}
21052105
rule = freq.rule_code
21062106
if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types):
21072107
if closed is None:
@@ -2299,7 +2299,7 @@ def _adjust_bin_edges(
22992299

23002300
if self.freq.name in ("BM", "ME", "W") or self.freq.name.split("-")[0] in (
23012301
"BQ",
2302-
"BA",
2302+
"BY",
23032303
"Q",
23042304
"Y",
23052305
"W",

pandas/core/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5729,7 +5729,7 @@ def to_timestamp(
57295729
2023-01-01 1
57305730
2024-01-01 2
57315731
2025-01-01 3
5732-
Freq: AS-JAN, dtype: int64
5732+
Freq: YS-JAN, dtype: int64
57335733
57345734
Using `freq` which is the offset that the Timestamps will have
57355735

0 commit comments

Comments
 (0)