DEPR: ‘AS’, ‘BA’ and ‘BAS’ in favour of ‘YS’, ‘BY’ and ‘BYS’ (pandas-dev#55479)

natmokval · gupta-paras · commit ee4a39a4a70b · 2023-10-12T16:20:03.000+05:30
* deprecate AS/BA/BAS in favour of YS/BY/BYS, fix tests

* correct def get_start_end_field, correct docstrings and v0.20.0.rst

* fix pre-commit error

* add deprecated frequencies to dict DEPR_ABBREVS, add tests
diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
@@ -896,9 +896,9 @@ into ``freq`` keyword arguments. The available date offsets and associated frequ
     :class:`~pandas.tseries.offsets.BQuarterBegin`, ``'BQS'``, "business quarter begin"
     :class:`~pandas.tseries.offsets.FY5253Quarter`, ``'REQ'``, "retail (aka 52-53 week) quarter"
     :class:`~pandas.tseries.offsets.YearEnd`, ``'Y'``, "calendar year end"
-    :class:`~pandas.tseries.offsets.YearBegin`, ``'AS'`` or ``'BYS'``,"calendar year begin"
-    :class:`~pandas.tseries.offsets.BYearEnd`, ``'BA'``, "business year end"
-    :class:`~pandas.tseries.offsets.BYearBegin`, ``'BAS'``, "business year begin"
+    :class:`~pandas.tseries.offsets.YearBegin`, ``'YS'`` or ``'BYS'``,"calendar year begin"
+    :class:`~pandas.tseries.offsets.BYearEnd`, ``'BY'``, "business year end"
+    :class:`~pandas.tseries.offsets.BYearBegin`, ``'BYS'``, "business year begin"
     :class:`~pandas.tseries.offsets.FY5253`, ``'RE'``, "retail (aka 52-53 week) year"
     :class:`~pandas.tseries.offsets.Easter`, None, "Easter holiday"
     :class:`~pandas.tseries.offsets.BusinessHour`, ``'bh'``, "business hour"
@@ -1259,9 +1259,9 @@ frequencies. We will refer to these aliases as *offset aliases*.
     "QS", "quarter start frequency"
     "BQS", "business quarter start frequency"
     "Y", "year end frequency"
-    "BA, BY", "business year end frequency"
-    "AS, YS", "year start frequency"
-    "BAS, BYS", "business year start frequency"
+    "BY", "business year end frequency"
+    "YS", "year start frequency"
+    "BYS", "business year start frequency"
     "h", "hourly frequency"
     "bh", "business hour frequency"
     "cbh", "custom business hour frequency"
@@ -1692,7 +1692,7 @@ the end of the interval.
 .. warning::
 
     The default values for ``label`` and ``closed`` is '**left**' for all
-    frequency offsets except for 'ME', 'Y', 'Q', 'BM', 'BA', 'BQ', and 'W'
+    frequency offsets except for 'ME', 'Y', 'Q', 'BM', 'BY', 'BQ', and 'W'
     which all have a default of 'right'.
 
     This might unintendedly lead to looking ahead, where the value for a later
diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
@@ -886,11 +886,23 @@ This would happen with a ``lexsorted``, but non-monotonic levels. (:issue:`15622
 
 This is *unchanged* from prior versions, but shown for illustration purposes:
 
-.. ipython:: python
+.. code-block:: python
 
-   df = pd.DataFrame(np.arange(6), columns=['value'],
-                     index=pd.MultiIndex.from_product([list('BA'), range(3)]))
-   df
+   In [81]: df = pd.DataFrame(np.arange(6), columns=['value'],
+      ....:                   index=pd.MultiIndex.from_product([list('BA'), range(3)]))
+      ....:
+   In [82]: df
+
+   Out[82]:
+        value
+   B 0      0
+     1      1
+     2      2
+   A 0      3
+     1      4
+     2      5
+
+   [6 rows x 1 columns]
 
 .. code-block:: python
 
diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c
@@ -1606,8 +1606,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
         tc->type = (pc->longValue) ? JT_TRUE : JT_FALSE;
         return;
     } else if (PyArray_IsScalar(obj, Float) ||
-               PyArray_IsScalar(obj, Double) ||
-               PyArray_IsScalar(obj, LongDouble)) {
+               PyArray_IsScalar(obj, Double)) {
+        PyArray_CastScalarToCtype(obj, &(pc->longDoubleValue),
+                                  PyArray_DescrFromType(NPY_DOUBLE));
+        tc->type = JT_LONG_DOUBLE;
+        return;
+    } else if (PyArray_IsScalar(obj, LongDouble)) {
         PyArray_CastScalarToCtype(obj, &(pc->longDoubleValue),
                                   PyArray_DescrFromType(NPY_LONGDOUBLE));
         tc->type = JT_LONG_DOUBLE;
diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx
@@ -192,9 +192,6 @@ OFFSET_TO_PERIOD_FREQSTR: dict = {
     "BQS": "Q",
     "QS": "Q",
     "BQ": "Q",
-    "BA": "Y",
-    "AS": "Y",
-    "BAS": "Y",
     "MS": "M",
     "D": "D",
     "B": "B",
@@ -205,9 +202,9 @@ OFFSET_TO_PERIOD_FREQSTR: dict = {
     "ns": "ns",
     "h": "h",
     "Q": "Q",
-    "Y": "Y",
     "W": "W",
     "ME": "M",
+    "Y": "Y",
     "BY": "Y",
     "YS": "Y",
     "BYS": "Y",
@@ -244,6 +241,45 @@ DEPR_ABBREVS: dict[str, str]= {
     "A-SEP": "Y-SEP",
     "A-OCT": "Y-OCT",
     "A-NOV": "Y-NOV",
+    "BA": "BY",
+    "BA-DEC": "BY-DEC",
+    "BA-JAN": "BY-JAN",
+    "BA-FEB": "BY-FEB",
+    "BA-MAR": "BY-MAR",
+    "BA-APR": "BY-APR",
+    "BA-MAY": "BY-MAY",
+    "BA-JUN": "BY-JUN",
+    "BA-JUL": "BY-JUL",
+    "BA-AUG": "BY-AUG",
+    "BA-SEP": "BY-SEP",
+    "BA-OCT": "BY-OCT",
+    "BA-NOV": "BY-NOV",
+    "AS": "YS",
+    "AS-DEC": "YS-DEC",
+    "AS-JAN": "YS-JAN",
+    "AS-FEB": "YS-FEB",
+    "AS-MAR": "YS-MAR",
+    "AS-APR": "YS-APR",
+    "AS-MAY": "YS-MAY",
+    "AS-JUN": "YS-JUN",
+    "AS-JUL": "YS-JUL",
+    "AS-AUG": "YS-AUG",
+    "AS-SEP": "YS-SEP",
+    "AS-OCT": "YS-OCT",
+    "AS-NOV": "YS-NOV",
+    "BAS": "BYS",
+    "BAS-DEC": "BYS-DEC",
+    "BAS-JAN": "BYS-JAN",
+    "BAS-FEB": "BYS-FEB",
+    "BAS-MAR": "BYS-MAR",
+    "BAS-APR": "BYS-APR",
+    "BAS-MAY": "BYS-MAY",
+    "BAS-JUN": "BYS-JUN",
+    "BAS-JUL": "BYS-JUL",
+    "BAS-AUG": "BYS-AUG",
+    "BAS-SEP": "BYS-SEP",
+    "BAS-OCT": "BYS-OCT",
+    "BAS-NOV": "BYS-NOV",
     "H": "h",
     "BH": "bh",
     "CBH": "cbh",
diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx
@@ -253,8 +253,8 @@ def get_start_end_field(
         # month of year. Other offsets use month, startingMonth as ending
         # month of year.
 
-        if (freqstr[0:2] in ["MS", "QS", "AS"]) or (
-                freqstr[1:3] in ["MS", "QS", "AS"]):
+        if (freqstr[0:2] in ["MS", "QS", "YS"]) or (
+                freqstr[1:3] in ["MS", "QS", "YS"]):
             end_month = 12 if month_kw == 1 else month_kw - 1
             start_month = month_kw
         else:
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -2414,7 +2414,7 @@ cdef class BYearEnd(YearOffset):
 
     _outputName = "BusinessYearEnd"
     _default_month = 12
-    _prefix = "BA"
+    _prefix = "BY"
     _day_opt = "business_end"
 
 
@@ -2453,7 +2453,7 @@ cdef class BYearBegin(YearOffset):
 
     _outputName = "BusinessYearBegin"
     _default_month = 1
-    _prefix = "BAS"
+    _prefix = "BYS"
     _day_opt = "business_start"
 
 
@@ -2552,7 +2552,7 @@ cdef class YearBegin(YearOffset):
     """
 
     _default_month = 1
-    _prefix = "AS"
+    _prefix = "YS"
     _day_opt = "start"
 
 
@@ -4540,10 +4540,10 @@ CDay = CustomBusinessDay
 prefix_mapping = {
     offset._prefix: offset
     for offset in [
-        YearBegin,  # 'AS'
+        YearBegin,  # 'YS'
         YearEnd,  # 'Y'
-        BYearBegin,  # 'BAS'
-        BYearEnd,  # 'BA'
+        BYearBegin,  # 'BYS'
+        BYearEnd,  # 'BY'
         BusinessDay,  # 'B'
         BusinessMonthBegin,  # 'BMS'
         BusinessMonthEnd,  # 'BM'
@@ -4584,12 +4584,9 @@ _lite_rule_alias = {
     "Q": "Q-DEC",
 
     "Y": "Y-DEC",      # YearEnd(month=12),
-    "AS": "AS-JAN",    # YearBegin(month=1),
-    "YS": "AS-JAN",
-    "BA": "BA-DEC",    # BYearEnd(month=12),
-    "BY": "BA-DEC",
-    "BAS": "BAS-JAN",  # BYearBegin(month=1),
-    "BYS": "BAS-JAN",
+    "YS": "YS-JAN",    # YearBegin(month=1),
+    "BY": "BY-DEC",    # BYearEnd(month=12),
+    "BYS": "BYS-JAN",  # BYearBegin(month=1),
 
     "Min": "min",
     "min": "min",
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -2526,7 +2526,7 @@ def _round_temporally(
             raise ValueError(f"Must specify a valid frequency: {freq}")
         pa_supported_unit = {
             "Y": "year",
-            "AS": "year",
+            "YS": "year",
             "Q": "quarter",
             "QS": "quarter",
             "M": "month",
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -992,11 +992,11 @@ def date_range(
 
     **Specify a unit**
 
-    >>> pd.date_range(start="2017-01-01", periods=10, freq="100AS", unit="s")
+    >>> pd.date_range(start="2017-01-01", periods=10, freq="100YS", unit="s")
     DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01',
                    '2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01',
                    '2817-01-01', '2917-01-01'],
-                  dtype='datetime64[s]', freq='100AS-JAN')
+                  dtype='datetime64[s]', freq='100YS-JAN')
     """
     if freq is None and com.any_none(periods, start, end):
         freq = "D"
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -2101,7 +2101,7 @@ def __init__(
         else:
             freq = to_offset(freq)
 
-        end_types = {"ME", "Y", "Q", "BM", "BA", "BQ", "W"}
+        end_types = {"ME", "Y", "Q", "BM", "BY", "BQ", "W"}
         rule = freq.rule_code
         if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types):
             if closed is None:
@@ -2299,7 +2299,7 @@ def _adjust_bin_edges(
 
         if self.freq.name in ("BM", "ME", "W") or self.freq.name.split("-")[0] in (
             "BQ",
-            "BA",
+            "BY",
             "Q",
             "Y",
             "W",
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -5729,7 +5729,7 @@ def to_timestamp(
         2023-01-01    1
         2024-01-01    2
         2025-01-01    3
-        Freq: AS-JAN, dtype: int64
+        Freq: YS-JAN, dtype: int64
 
         Using `freq` which is the offset that the Timestamps will have
 
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
@@ -1594,7 +1594,7 @@ def test_dt64arr_add_sub_offset_array(
                     Timestamp("2016-04-01"),
                     Timestamp("2017-04-01"),
                 ],
-                "AS-APR",
+                "YS-APR",
             ),
             (
                 "__sub__",
@@ -1616,7 +1616,7 @@ def test_dt64arr_add_sub_offset_array(
                     Timestamp("2015-10-01"),
                     Timestamp("2016-10-01"),
                 ],
-                "AS-OCT",
+                "YS-OCT",
             ),
         ],
     )
@@ -1625,7 +1625,7 @@ def test_dti_add_sub_nonzero_mth_offset(
     ):
         # GH 26258
         tz = tz_aware_fixture
-        date = date_range(start="01 Jan 2014", end="01 Jan 2017", freq="AS", tz=tz)
+        date = date_range(start="01 Jan 2014", end="01 Jan 2017", freq="YS", tz=tz)
         date = tm.box_expected(date, box_with_array, False)
         mth = getattr(date, op)
         result = mth(offset)
diff --git a/pandas/tests/frame/methods/test_to_timestamp.py b/pandas/tests/frame/methods/test_to_timestamp.py
@@ -44,7 +44,7 @@ def test_to_timestamp(self, frame_or_series):
         if frame_or_series is Series:
             assert result.name == "A"
 
-        exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN")
+        exp_index = date_range("1/1/2001", end="1/1/2009", freq="YS-JAN")
         result = obj.to_timestamp("D", "start")
         tm.assert_index_equal(result.index, exp_index)
 
@@ -88,7 +88,7 @@ def test_to_timestamp_columns(self):
         tm.assert_index_equal(result.columns, exp_index)
         tm.assert_numpy_array_equal(result.values, df.values)
 
-        exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN")
+        exp_index = date_range("1/1/2001", end="1/1/2009", freq="YS-JAN")
         result = df.to_timestamp("D", "start", axis=1)
         tm.assert_index_equal(result.columns, exp_index)
 
@@ -112,14 +112,14 @@ def test_to_timestamp_columns(self):
 
         result1 = df.to_timestamp("5min", axis=1)
         result2 = df.to_timestamp("min", axis=1)
-        expected = date_range("2001-01-01", "2009-01-01", freq="AS")
+        expected = date_range("2001-01-01", "2009-01-01", freq="YS")
         assert isinstance(result1.columns, DatetimeIndex)
         assert isinstance(result2.columns, DatetimeIndex)
         tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8)
         tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8)
         # PeriodIndex.to_timestamp always use 'infer'
-        assert result1.columns.freqstr == "AS-JAN"
-        assert result2.columns.freqstr == "AS-JAN"
+        assert result1.columns.freqstr == "YS-JAN"
+        assert result2.columns.freqstr == "YS-JAN"
 
     def test_to_timestamp_invalid_axis(self):
         index = period_range(freq="Y", start="1/1/2001", end="12/1/2009")
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
@@ -734,7 +734,7 @@ def test_list_grouper_with_nat(self):
         # GH 14715
         df = DataFrame({"date": date_range("1/1/2011", periods=365, freq="D")})
         df.iloc[-1] = pd.NaT
-        grouper = Grouper(key="date", freq="AS")
+        grouper = Grouper(key="date", freq="YS")
 
         # Grouper in a list grouping
         result = df.groupby([grouper])
diff --git a/pandas/tests/indexes/datetimes/methods/test_to_period.py b/pandas/tests/indexes/datetimes/methods/test_to_period.py
@@ -56,7 +56,7 @@ def test_to_period_quarterlyish(self, off):
         prng = rng.to_period()
         assert prng.freq == "Q-DEC"
 
-    @pytest.mark.parametrize("off", ["BA", "AS", "BAS"])
+    @pytest.mark.parametrize("off", ["BY", "YS", "BYS"])
     def test_to_period_annualish(self, off):
         rng = date_range("01-Jan-2012", periods=8, freq=off)
         prng = rng.to_period()
diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py
@@ -648,7 +648,7 @@ def test_constructor_coverage(self):
         with pytest.raises(ValueError, match=msg):
             date_range(periods=10, freq="D")
 
-    @pytest.mark.parametrize("freq", ["AS", "W-SUN"])
+    @pytest.mark.parametrize("freq", ["YS", "W-SUN"])
     def test_constructor_datetime64_tzformat(self, freq):
         # see GH#6572: ISO 8601 format results in stdlib timezone object
         idx = date_range(
@@ -981,8 +981,8 @@ def test_dti_constructor_years_only(self, tz_naive_fixture):
         rng3 = date_range("2014", "2020", freq="Y", tz=tz)
         expected3 = date_range("2014-12-31", "2019-12-31", freq="Y", tz=tz)
 
-        rng4 = date_range("2014", "2020", freq="AS", tz=tz)
-        expected4 = date_range("2014-01-01", "2020-01-01", freq="AS", tz=tz)
+        rng4 = date_range("2014", "2020", freq="YS", tz=tz)
+        expected4 = date_range("2014-01-01", "2020-01-01", freq="YS", tz=tz)
 
         for rng, expected in [
             (rng1, expected1),
diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py
@@ -243,13 +243,12 @@ def test_date_range_gen_error(self):
         rng = date_range("1/1/2000 00:00", "1/1/2000 00:18", freq="5min")
         assert len(rng) == 4
 
-    @pytest.mark.parametrize("freq", ["AS", "YS"])
-    def test_begin_year_alias(self, freq):
+    def test_begin_year_alias(self):
         # see gh-9313
-        rng = date_range("1/1/2013", "7/1/2017", freq=freq)
+        rng = date_range("1/1/2013", "7/1/2017", freq="YS")
         exp = DatetimeIndex(
             ["2013-01-01", "2014-01-01", "2015-01-01", "2016-01-01", "2017-01-01"],
-            freq=freq,
+            freq="YS",
         )
         tm.assert_index_equal(rng, exp)
 
@@ -261,12 +260,11 @@ def test_end_year_alias(self):
         )
         tm.assert_index_equal(rng, exp)
 
-    @pytest.mark.parametrize("freq", ["BA", "BY"])
-    def test_business_end_year_alias(self, freq):
+    def test_business_end_year_alias(self):
         # see gh-9313
-        rng = date_range("1/1/2013", "7/1/2017", freq=freq)
+        rng = date_range("1/1/2013", "7/1/2017", freq="BY")
         exp = DatetimeIndex(
-            ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-30"], freq=freq
+            ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-30"], freq="BY"
         )
         tm.assert_index_equal(rng, exp)
 
diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py
diff --git a/pandas/tests/indexes/period/methods/test_to_timestamp.py b/pandas/tests/indexes/period/methods/test_to_timestamp.py
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py
diff --git a/pandas/tests/tseries/frequencies/test_inference.py b/pandas/tests/tseries/frequencies/test_inference.py
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py