CLN: assorted (#51775)

jbrockmendel · web-flow · commit f7df8bf781b8 · 2023-03-07T15:14:17.000-08:00
* CLN: assorted

* more specific

* more accurate
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -790,7 +790,6 @@ Other API changes
 Deprecations
 ~~~~~~~~~~~~
 - Deprecated parsing datetime strings with system-local timezone to ``tzlocal``, pass a ``tz`` keyword or explicitly call ``tz_localize`` instead (:issue:`50791`)
-- Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
 - Deprecated argument ``infer_datetime_format`` in :func:`to_datetime` and :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`)
 - Deprecated behavior of :func:`to_datetime` with ``unit`` when parsing strings, in a future version these will be parsed as datetimes (matching unit-less behavior) instead of cast to floats. To retain the old behavior, cast strings to numeric types before calling :func:`to_datetime` (:issue:`50735`)
 - Deprecated :func:`pandas.io.sql.execute` (:issue:`50185`)
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -92,6 +92,7 @@ Other API changes
 
 Deprecations
 ~~~~~~~~~~~~
+- Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
 - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
 - Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
 - Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`)
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
@@ -126,11 +126,12 @@ cdef class NDArrayBacked:
 
     @property
     def size(self) -> int:
-        return self._ndarray.size
+        # TODO(cython3): use self._ndarray.size
+        return cnp.PyArray_SIZE(self._ndarray)
 
     @property
     def nbytes(self) -> int:
-        return self._ndarray.nbytes
+        return cnp.PyArray_NBYTES(self._ndarray)
 
     def copy(self, order="C"):
         cdef:
diff --git a/pandas/_libs/parsers.pyi b/pandas/_libs/parsers.pyi
@@ -67,3 +67,10 @@ class TextReader:
     def close(self) -> None: ...
     def read(self, rows: int | None = ...) -> dict[int, ArrayLike]: ...
     def read_low_memory(self, rows: int | None) -> list[dict[int, ArrayLike]]: ...
+
+# _maybe_upcast, na_values are only exposed for testing
+na_values: dict
+
+def _maybe_upcast(
+    arr, use_nullable_dtypes: bool = ..., dtype_backend: str = ...
+) -> np.ndarray: ...
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -695,7 +695,7 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz):
             if ts is NaT:
                 ival = NPY_NAT
             else:
-                if ts.tz is not None:
+                if ts.tzinfo is not None:
                     ts = ts.tz_convert(tz)
                 else:
                     # datetime64, tznaive pydatetime, int, float
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
@@ -176,6 +176,9 @@ cpdef inline (int64_t, int) precision_from_unit(
         multiplier = periods_per_second(out_reso)
         m = multiplier * 2629746
     else:
+        # Careful: if get_conversion_factor raises, the exception does
+        #  not propagate, instead we get a warning about an ignored exception.
+        #  https://github.com/pandas-dev/pandas/pull/51483#discussion_r1115198951
         m = get_conversion_factor(reso, out_reso)
 
     p = <int>log10(m)  # number of digits in 'm' minus 1
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -4096,7 +4096,7 @@ cpdef to_offset(freq):
 
     Returns
     -------
-    DateOffset or None
+    BaseOffset subclass or None
 
     Raises
     ------
diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c
@@ -224,29 +224,6 @@ static npy_int64 days_to_yearsdays(npy_int64 *days_) {
     return year + 2000;
 }
 
-/*
- * Adjusts a datetimestruct based on a seconds offset. Assumes
- * the current values are valid.
- */
-NPY_NO_EXPORT void add_seconds_to_datetimestruct(npy_datetimestruct *dts,
-                                                 int seconds) {
-    int minutes;
-
-    dts->sec += seconds;
-    if (dts->sec < 0) {
-        minutes = dts->sec / 60;
-        dts->sec = dts->sec % 60;
-        if (dts->sec < 0) {
-            --minutes;
-            dts->sec += 60;
-        }
-        add_minutes_to_datetimestruct(dts, minutes);
-    } else if (dts->sec >= 60) {
-        minutes = dts->sec / 60;
-        dts->sec = dts->sec % 60;
-        add_minutes_to_datetimestruct(dts, minutes);
-    }
-}
 
 /*
  * Fills in the year, month, day in 'dts' based on the days
diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx
@@ -153,7 +153,6 @@ cdef int64_t tz_localize_to_utc_single(
         return val
 
     elif is_utc(tz) or tz is None:
-        # TODO: test with non-nano
         return val
 
     elif is_tzlocal(tz):
diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py
@@ -1179,8 +1179,8 @@ def assert_frame_equal(
 
     # compare by blocks
     if by_blocks:
-        rblocks = right._to_dict_of_blocks()
-        lblocks = left._to_dict_of_blocks()
+        rblocks = right._to_dict_of_blocks(copy=False)
+        lblocks = left._to_dict_of_blocks(copy=False)
         for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))):
             assert dtype in lblocks
             assert dtype in rblocks
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -128,6 +128,10 @@ def ignore_doctest_warning(item: pytest.Item, path: str, message: str) -> None:
 
 
 def pytest_collection_modifyitems(items, config) -> None:
+    is_doctest = config.getoption("--doctest-modules") or config.getoption(
+        "--doctest-cython", default=False
+    )
+
     # Warnings from doctests that can be ignored; place reason in comment above.
     # Each entry specifies (path, message) - see the ignore_doctest_warning function
     ignored_doctest_warnings = [
@@ -136,9 +140,7 @@ def pytest_collection_modifyitems(items, config) -> None:
     ]
 
     for item in items:
-        if config.getoption("--doctest-modules") or config.getoption(
-            "--doctest-cython", default=False
-        ):
+        if is_doctest:
             # autouse=True for the add_doctest_imports can lead to expensive teardowns
             # since doctest_namespace is a session fixture
             item.add_marker(pytest.mark.usefixtures("add_doctest_imports"))
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -760,6 +760,8 @@ def _add_timedelta_arraylike(
 
         dtype = np.dtype(f"m8[{freq._td64_unit}]")
 
+        # Similar to _check_timedeltalike_freq_compat, but we raise with a
+        #  more specific exception message if necessary.
         try:
             delta = astype_overflowsafe(
                 np.asarray(other), dtype=dtype, copy=False, round_ok=False
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -63,12 +63,14 @@
 )
 from pandas.core.dtypes.missing import isna
 
-from pandas.core import nanops
+from pandas.core import (
+    nanops,
+    roperator,
+)
 from pandas.core.array_algos import datetimelike_accumulations
 from pandas.core.arrays import datetimelike as dtl
 from pandas.core.arrays._ranges import generate_regular_range
 import pandas.core.common as com
-from pandas.core.ops import roperator
 from pandas.core.ops.common import unpack_zerodim_and_defer
 
 if TYPE_CHECKING:
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -503,7 +503,7 @@ def _cython_transform(
 
     def _transform_general(self, func: Callable, *args, **kwargs) -> Series:
         """
-        Transform with a callable func`.
+        Transform with a callable `func`.
         """
         assert callable(func)
         klass = type(self.obj)
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1615,7 +1615,7 @@ def _get_list_axis(self, key, axis: AxisInt):
         try:
             return self.obj._take_with_is_copy(key, axis=axis)
         except IndexError as err:
-            # re-raise with different error message
+            # re-raise with different error message, e.g. test_getitem_ndarray_3d
             raise IndexError("positional indexers are out-of-bounds") from err
 
     def _getitem_axis(self, key, axis: AxisInt):
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
@@ -263,7 +263,7 @@ def to_series(right):
             # We need to pass dtype=right.dtype to retain object dtype
             #  otherwise we lose consistency with Index and array ops
             dtype = None
-            if getattr(right, "dtype", None) == object:
+            if right.dtype == object:
                 # can't pass right.dtype unconditionally as that would break on e.g.
                 #  datetime64[h] ndarray
                 dtype = object
diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
@@ -47,12 +47,10 @@
     notna,
 )
 
+from pandas.core import roperator
 from pandas.core.computation import expressions
 from pandas.core.construction import ensure_wrapped_if_datetimelike
-from pandas.core.ops import (
-    missing,
-    roperator,
-)
+from pandas.core.ops import missing
 from pandas.core.ops.dispatch import should_extension_dispatch
 from pandas.core.ops.invalid import invalid_comparison
 
diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py
@@ -10,7 +10,7 @@
     ABCSeries,
 )
 
-from pandas.core.ops import roperator
+from pandas.core import roperator
 
 
 def _get_method_wrappers(cls):
diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py
@@ -33,7 +33,7 @@
     is_scalar,
 )
 
-from pandas.core.ops import roperator
+from pandas.core import roperator
 
 
 def _fill_zeros(result, x, y):
diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py
@@ -87,8 +87,6 @@ def as_json_table_type(x: DtypeObj) -> str:
         return "datetime"
     elif is_timedelta64_dtype(x):
         return "duration"
-    elif is_categorical_dtype(x):
-        return "any"
     elif is_extension_array_dtype(x):
         return "any"
     elif is_string_dtype(x):
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
@@ -34,7 +34,7 @@
     date_range,
 )
 import pandas._testing as tm
-from pandas.core.ops import roperator
+from pandas.core import roperator
 from pandas.tests.arithmetic.common import (
     assert_cannot_add,
     assert_invalid_addsub_type,
@@ -1550,9 +1550,8 @@ def test_dt64arr_add_sub_DateOffset(self, box_with_array):
         ],
     )
     @pytest.mark.parametrize("op", [operator.add, roperator.radd, operator.sub])
-    @pytest.mark.parametrize("box_other", [True, False])
     def test_dt64arr_add_sub_offset_array(
-        self, tz_naive_fixture, box_with_array, box_other, op, other
+        self, tz_naive_fixture, box_with_array, op, other
     ):
         # GH#18849
         # GH#10699 array of offsets
@@ -1561,19 +1560,20 @@ def test_dt64arr_add_sub_offset_array(
         dti = date_range("2017-01-01", periods=2, tz=tz)
         dtarr = tm.box_expected(dti, box_with_array)
 
-        other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
         expected = DatetimeIndex([op(dti[n], other[n]) for n in range(len(dti))])
         expected = tm.box_expected(expected, box_with_array).astype(object)
 
-        if box_other:
-            other = tm.box_expected(other, box_with_array)
-            if box_with_array is pd.array and op is roperator.radd:
-                # We expect a PandasArray, not ndarray[object] here
-                expected = pd.array(expected, dtype=object)
-
         with tm.assert_produces_warning(PerformanceWarning):
             res = op(dtarr, other)
+        tm.assert_equal(res, expected)
 
+        # Same thing but boxing other
+        other = tm.box_expected(other, box_with_array)
+        if box_with_array is pd.array and op is roperator.radd:
+            # We expect a PandasArray, not ndarray[object] here
+            expected = pd.array(expected, dtype=object)
+        with tm.assert_produces_warning(PerformanceWarning):
+            res = op(dtarr, other)
         tm.assert_equal(res, expected)
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/arrays/sparse/test_libsparse.py b/pandas/tests/arrays/sparse/test_libsparse.py
@@ -212,6 +212,8 @@ def test_intersect_empty(self):
     @pytest.mark.parametrize(
         "case",
         [
+            # Argument 2 to "IntIndex" has incompatible type "ndarray[Any,
+            # dtype[signedinteger[_32Bit]]]"; expected "Sequence[int]"
             IntIndex(5, np.array([1, 2], dtype=np.int32)),  # type: ignore[arg-type]
             IntIndex(5, np.array([0, 2, 4], dtype=np.int32)),  # type: ignore[arg-type]
             IntIndex(0, np.array([], dtype=np.int32)),  # type: ignore[arg-type]
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -389,7 +389,15 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques
         if all_numeric_accumulations != "cumsum" or pa_version_under9p0:
             # xfailing takes a long time to run because pytest
             # renders the exception messages even when not showing them
-            pytest.skip(f"{all_numeric_accumulations} not implemented for pyarrow < 9")
+            opt = request.config.option
+            if opt.markexpr and "not slow" in opt.markexpr:
+                pytest.skip(
+                    f"{all_numeric_accumulations} not implemented for pyarrow < 9"
+                )
+            mark = pytest.mark.xfail(
+                reason=f"{all_numeric_accumulations} not implemented for pyarrow < 9"
+            )
+            request.node.add_marker(mark)
 
         elif all_numeric_accumulations == "cumsum" and (
             pa.types.is_boolean(pa_type) or pa.types.is_decimal(pa_type)
@@ -1409,14 +1417,16 @@ def test_arrowdtype_construct_from_string_type_with_unsupported_parameters():
     with pytest.raises(NotImplementedError, match="Passing pyarrow type"):
         ArrowDtype.construct_from_string("not_a_real_dype[s, tz=UTC][pyarrow]")
 
-    # but as of GH#50689, timestamptz is supported
+    with pytest.raises(NotImplementedError, match="Passing pyarrow type"):
+        ArrowDtype.construct_from_string("decimal(7, 2)[pyarrow]")
+
+
+def test_arrowdtype_construct_from_string_supports_dt64tz():
+    # as of GH#50689, timestamptz is supported
     dtype = ArrowDtype.construct_from_string("timestamp[s, tz=UTC][pyarrow]")
     expected = ArrowDtype(pa.timestamp("s", "UTC"))
     assert dtype == expected
 
-    with pytest.raises(NotImplementedError, match="Passing pyarrow type"):
-        ArrowDtype.construct_from_string("decimal(7, 2)[pyarrow]")
-
 
 def test_arrowdtype_construct_from_string_type_only_one_pyarrow():
     # GH#51225
diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py
@@ -72,7 +72,7 @@ def test_from_records_sequencelike(self):
 
         # this is actually tricky to create the recordlike arrays and
         # have the dtypes be intact
-        blocks = df._to_dict_of_blocks()
+        blocks = df._to_dict_of_blocks(copy=False)
         tuples = []
         columns = []
         dtypes = []
@@ -153,7 +153,7 @@ def test_from_records_dictlike(self):
 
         # columns is in a different order here than the actual items iterated
         # from the dict
-        blocks = df._to_dict_of_blocks()
+        blocks = df._to_dict_of_blocks(copy=False)
         columns = []
         for b in blocks.values():
             columns.extend(b.columns)
diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py
@@ -595,3 +595,10 @@ def test_intersection_dst_transition(self, tz):
         result = idx1.intersection(idx2)
         expected = date_range("2020-03-30", periods=2, freq="D", tz=tz)
         tm.assert_index_equal(result, expected)
+
+        # GH#45863 same problem for union
+        index1 = date_range("2021-10-28", periods=3, freq="D", tz="Europe/London")
+        index2 = date_range("2021-10-30", periods=4, freq="D", tz="Europe/London")
+        result = index1.union(index2)
+        expected = date_range("2021-10-28", periods=6, freq="D", tz="Europe/London")
+        tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py
diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
diff --git a/pandas/tests/io/parser/test_upcast.py b/pandas/tests/io/parser/test_upcast.py
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@`
`10`	`10`	`ABCSeries,`
`11`	`11`	`)`
`12`	`12`
`13`		`-from pandas.core.ops import roperator`
	`13`	`+from pandas.core import roperator`
`14`	`14`
`15`	`15`
`16`	`16`	`def _get_method_wrappers(cls):`
Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,7 @@`
`33`	`33`	`is_scalar,`
`34`	`34`	`)`
`35`	`35`
`36`		`-from pandas.core.ops import roperator`
	`36`	`+from pandas.core import roperator`
`37`	`37`
`38`	`38`
`39`	`39`	`def _fill_zeros(result, x, y):`
Original file line number	Diff line number	Diff line change
`@@ -212,6 +212,8 @@ def test_intersect_empty(self):`
`212`	`212`	`@pytest.mark.parametrize(`
`213`	`213`	`"case",`
`214`	`214`	`[`
	`215`	`+ # Argument 2 to "IntIndex" has incompatible type "ndarray[Any,`
	`216`	`+ # dtype[signedinteger[_32Bit]]]"; expected "Sequence[int]"`
`215`	`217`	`IntIndex(5, np.array([1, 2], dtype=np.int32)), # type: ignore[arg-type]`
`216`	`218`	`IntIndex(5, np.array([0, 2, 4], dtype=np.int32)), # type: ignore[arg-type]`
`217`	`219`	`IntIndex(0, np.array([], dtype=np.int32)), # type: ignore[arg-type]`