Skip to content

Commit 9f1adbc

Browse files
committed
Merge remote-tracking branch 'upstream/main' into pandas-asan
2 parents 01070f3 + 6a65c64 commit 9f1adbc

39 files changed

+492
-199
lines changed

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ repos:
3232
# TODO: remove autofixe-only rules when they are checked by ruff
3333
name: ruff-selected-autofixes
3434
alias: ruff-selected-autofixes
35-
args: [--select, "ANN001,ANN204", --fix-only, --exit-non-zero-on-fix]
35+
args: [--select, "ANN001,ANN2", --fix-only, --exit-non-zero-on-fix]
3636
- repo: https://github.com/jendrikseipp/vulture
3737
rev: 'v2.10'
3838
hooks:

doc/make.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def _process_single_doc(self, single_doc):
102102
)
103103

104104
@staticmethod
105-
def _run_os(*args):
105+
def _run_os(*args) -> None:
106106
"""
107107
Execute a command as a OS terminal.
108108
@@ -149,7 +149,7 @@ def _sphinx_build(self, kind: str):
149149
]
150150
return subprocess.call(cmd)
151151

152-
def _open_browser(self, single_doc_html):
152+
def _open_browser(self, single_doc_html) -> None:
153153
"""
154154
Open a browser tab showing single
155155
"""
@@ -183,7 +183,7 @@ def _get_page_title(self, page):
183183

184184
return title.astext()
185185

186-
def _add_redirects(self):
186+
def _add_redirects(self) -> None:
187187
"""
188188
Create in the build directory an html file with a redirect,
189189
for every row in REDIRECTS_FILE.
@@ -272,14 +272,14 @@ def latex_forced(self):
272272
return self.latex(force=True)
273273

274274
@staticmethod
275-
def clean():
275+
def clean() -> None:
276276
"""
277277
Clean documentation generated files.
278278
"""
279279
shutil.rmtree(BUILD_PATH, ignore_errors=True)
280280
shutil.rmtree(os.path.join(SOURCE_PATH, "reference", "api"), ignore_errors=True)
281281

282-
def zip_html(self):
282+
def zip_html(self) -> None:
283283
"""
284284
Compress HTML documentation into a zip file.
285285
"""

doc/scripts/eval_performance.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def bench(mn=3, mx=7, num=100, engines=("python", "numexpr"), verbose=False):
6464
return ev, qu
6565

6666

67-
def plot_perf(df, engines, title, filename=None):
67+
def plot_perf(df, engines, title, filename=None) -> None:
6868
from matplotlib.pyplot import figure
6969

7070
sns.set()

doc/source/conf.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ class AccessorDocumenter(MethodDocumenter):
502502
# lower than MethodDocumenter so this is not chosen for normal methods
503503
priority = 0.6
504504

505-
def format_signature(self):
505+
def format_signature(self) -> str:
506506
# this method gives an error/warning for the accessors, therefore
507507
# overriding it (accessor has no arguments)
508508
return ""
@@ -632,7 +632,7 @@ def get_items(self, names):
632632

633633

634634
# based on numpy doc/source/conf.py
635-
def linkcode_resolve(domain, info):
635+
def linkcode_resolve(domain, info) -> str | None:
636636
"""
637637
Determine the URL corresponding to Python object
638638
"""
@@ -694,12 +694,12 @@ def linkcode_resolve(domain, info):
694694

695695
# remove the docstring of the flags attribute (inherited from numpy ndarray)
696696
# because these give doc build errors (see GH issue 5331)
697-
def remove_flags_docstring(app, what, name, obj, options, lines):
697+
def remove_flags_docstring(app, what, name, obj, options, lines) -> None:
698698
if what == "attribute" and name.endswith(".flags"):
699699
del lines[:]
700700

701701

702-
def process_class_docstrings(app, what, name, obj, options, lines):
702+
def process_class_docstrings(app, what, name, obj, options, lines) -> None:
703703
"""
704704
For those classes for which we use ::
705705
@@ -751,7 +751,7 @@ def process_class_docstrings(app, what, name, obj, options, lines):
751751
]
752752

753753

754-
def process_business_alias_docstrings(app, what, name, obj, options, lines):
754+
def process_business_alias_docstrings(app, what, name, obj, options, lines) -> None:
755755
"""
756756
Starting with sphinx 3.4, the "autodoc-process-docstring" event also
757757
gets called for alias classes. This results in numpydoc adding the
@@ -774,7 +774,7 @@ def process_business_alias_docstrings(app, what, name, obj, options, lines):
774774
suppress_warnings.append("ref.ref")
775775

776776

777-
def rstjinja(app, docname, source):
777+
def rstjinja(app, docname, source) -> None:
778778
"""
779779
Render our pages as a jinja template for fancy templating goodness.
780780
"""
@@ -787,7 +787,7 @@ def rstjinja(app, docname, source):
787787
source[0] = rendered
788788

789789

790-
def setup(app):
790+
def setup(app) -> None:
791791
app.connect("source-read", rstjinja)
792792
app.connect("autodoc-process-docstring", remove_flags_docstring)
793793
app.connect("autodoc-process-docstring", process_class_docstrings)

doc/source/user_guide/io.rst

+8
Original file line numberDiff line numberDiff line change
@@ -836,6 +836,7 @@ order) and the new column names will be the concatenation of the component
836836
column names:
837837

838838
.. ipython:: python
839+
:okwarning:
839840
840841
data = (
841842
"KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
@@ -856,6 +857,7 @@ By default the parser removes the component date columns, but you can choose
856857
to retain them via the ``keep_date_col`` keyword:
857858

858859
.. ipython:: python
860+
:okwarning:
859861
860862
df = pd.read_csv(
861863
"tmp.csv", header=None, parse_dates=[[1, 2], [1, 3]], keep_date_col=True
@@ -871,6 +873,7 @@ single column.
871873
You can also use a dict to specify custom name columns:
872874

873875
.. ipython:: python
876+
:okwarning:
874877
875878
date_spec = {"nominal": [1, 2], "actual": [1, 3]}
876879
df = pd.read_csv("tmp.csv", header=None, parse_dates=date_spec)
@@ -883,6 +886,7 @@ data columns:
883886

884887

885888
.. ipython:: python
889+
:okwarning:
886890
887891
date_spec = {"nominal": [1, 2], "actual": [1, 3]}
888892
df = pd.read_csv(
@@ -902,6 +906,10 @@ data columns:
902906
for your data to store datetimes in this format, load times will be
903907
significantly faster, ~20x has been observed.
904908

909+
.. deprecated:: 2.2.0
910+
Combining date columns inside read_csv is deprecated. Use ``pd.to_datetime``
911+
on the relevant result columns instead.
912+
905913

906914
Date parsing functions
907915
++++++++++++++++++++++

doc/source/user_guide/timeseries.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -1770,7 +1770,8 @@ We can instead only resample those groups where we have points as follows:
17701770
def round(t, freq):
17711771
# round a Timestamp to a specified freq
17721772
freq = to_offset(freq)
1773-
return pd.Timestamp((t.value // freq.delta.value) * freq.delta.value)
1773+
td = pd.Timedelta(freq)
1774+
return pd.Timestamp((t.value // td.value) * td.value)
17741775
17751776
ts.groupby(partial(round, freq="3min")).sum()
17761777

doc/source/whatsnew/v2.2.0.rst

+4
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,7 @@ Set the following option to opt into the future behavior:
438438
Other Deprecations
439439
^^^^^^^^^^^^^^^^^^
440440
- Changed :meth:`Timedelta.resolution_string` to return ``h``, ``min``, ``s``, ``ms``, ``us``, and ``ns`` instead of ``H``, ``T``, ``S``, ``L``, ``U``, and ``N``, for compatibility with respective deprecations in frequency aliases (:issue:`52536`)
441+
- Deprecated :attr:`offsets.Day.delta`, :attr:`offsets.Hour.delta`, :attr:`offsets.Minute.delta`, :attr:`offsets.Second.delta`, :attr:`offsets.Milli.delta`, :attr:`offsets.Micro.delta`, :attr:`offsets.Nano.delta`, use ``pd.Timedelta(obj)`` instead (:issue:`55498`)
441442
- Deprecated :func:`pandas.api.types.is_interval` and :func:`pandas.api.types.is_period`, use ``isinstance(obj, pd.Interval)`` and ``isinstance(obj, pd.Period)`` instead (:issue:`55264`)
442443
- Deprecated :func:`pd.core.internals.api.make_block`, use public APIs instead (:issue:`40226`)
443444
- Deprecated :func:`read_gbq` and :meth:`DataFrame.to_gbq`. Use ``pandas_gbq.read_gbq`` and ``pandas_gbq.to_gbq`` instead https://pandas-gbq.readthedocs.io/en/latest/api.html (:issue:`55525`)
@@ -480,13 +481,15 @@ Other Deprecations
480481
- Deprecated strings ``H``, ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
481482
- Deprecated strings ``H``, ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
482483
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
484+
- Deprecated support for combining parsed datetime columns in :func:`read_csv` along with the ``keep_date_col`` keyword (:issue:`55569`)
483485
- Deprecated the :attr:`.DataFrameGroupBy.grouper` and :attr:`SeriesGroupBy.grouper`; these attributes will be removed in a future version of pandas (:issue:`56521`)
484486
- Deprecated the :class:`.Grouping` attributes ``group_index``, ``result_index``, and ``group_arraylike``; these will be removed in a future version of pandas (:issue:`56148`)
485487
- Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`)
486488
- Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`)
487489
- Deprecated the ``kind`` keyword in :meth:`Series.resample` and :meth:`DataFrame.resample`, explicitly cast the object's ``index`` instead (:issue:`55895`)
488490
- Deprecated the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`)
489491
- Deprecated the ``unit`` keyword in :class:`TimedeltaIndex` construction, use :func:`to_timedelta` instead (:issue:`55499`)
492+
- Deprecated the ``verbose`` keyword in :func:`read_csv` and :func:`read_table` (:issue:`55569`)
490493
- Deprecated the behavior of :meth:`DataFrame.replace` and :meth:`Series.replace` with :class:`CategoricalDtype`; in a future version replace will change the values while preserving the categories. To change the categories, use ``ser.cat.rename_categories`` instead (:issue:`55147`)
491494
- Deprecated the behavior of :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype; in a future version these will not perform dtype inference on the resulting :class:`Index`, do ``result.index = result.index.infer_objects()`` to retain the old behavior (:issue:`56161`)
492495
- Deprecated the default of ``observed=False`` in :meth:`DataFrame.pivot_table`; will be ``True`` in a future version (:issue:`56236`)
@@ -615,6 +618,7 @@ Indexing
615618
- Bug in :meth:`DataFrame.loc` when setting :class:`Series` with extension dtype into NumPy dtype (:issue:`55604`)
616619
- Bug in :meth:`Index.difference` not returning a unique set of values when ``other`` is empty or ``other`` is considered non-comparable (:issue:`55113`)
617620
- Bug in setting :class:`Categorical` values into a :class:`DataFrame` with numpy dtypes raising ``RecursionError`` (:issue:`52927`)
621+
- Fixed bug when creating new column with missing values when setting a single string value (:issue:`56204`)
618622

619623
Missing
620624
^^^^^^^

generate_pxi.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from Cython import Tempita
55

66

7-
def process_tempita(pxifile, outfile):
7+
def process_tempita(pxifile, outfile) -> None:
88
with open(pxifile, encoding="utf-8") as f:
99
tmpl = f.read()
1010
pyxcontent = Tempita.sub(tmpl)
@@ -13,7 +13,7 @@ def process_tempita(pxifile, outfile):
1313
f.write(pyxcontent)
1414

1515

16-
def main():
16+
def main() -> None:
1717
parser = argparse.ArgumentParser()
1818
parser.add_argument("infile", type=str, help="Path to the input file")
1919
parser.add_argument("-o", "--outdir", type=str, help="Path to the output directory")

generate_version.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
sys.path.insert(0, "")
1111

1212

13-
def write_version_info(path):
13+
def write_version_info(path) -> None:
1414
version = None
1515
git_version = None
1616

@@ -29,7 +29,7 @@ def write_version_info(path):
2929
file.write(f'__git_version__="{git_version}"\n')
3030

3131

32-
def main():
32+
def main() -> None:
3333
parser = argparse.ArgumentParser()
3434
parser.add_argument(
3535
"-o",

pandas/_libs/tslibs/offsets.pyx

+22-11
Original file line numberDiff line numberDiff line change
@@ -913,8 +913,19 @@ cdef class Tick(SingleConstructorOffset):
913913
# Since cdef classes have no __dict__, we need to override
914914
return ""
915915

916+
@cache_readonly
917+
def _as_pd_timedelta(self):
918+
return Timedelta(self)
919+
916920
@property
917921
def delta(self):
922+
warnings.warn(
923+
# GH#55498
924+
f"{type(self).__name__}.delta is deprecated and will be removed in "
925+
"a future version. Use pd.Timedelta(obj) instead",
926+
FutureWarning,
927+
stacklevel=find_stack_level(),
928+
)
918929
try:
919930
return self.n * Timedelta(self._nanos_inc)
920931
except OverflowError as err:
@@ -962,22 +973,22 @@ cdef class Tick(SingleConstructorOffset):
962973
except ValueError:
963974
# e.g. "infer"
964975
return False
965-
return self.delta == other
976+
return self._as_pd_timedelta == other
966977

967978
def __ne__(self, other):
968979
return not (self == other)
969980

970981
def __le__(self, other):
971-
return self.delta.__le__(other)
982+
return self._as_pd_timedelta.__le__(other)
972983

973984
def __lt__(self, other):
974-
return self.delta.__lt__(other)
985+
return self._as_pd_timedelta.__lt__(other)
975986

976987
def __ge__(self, other):
977-
return self.delta.__ge__(other)
988+
return self._as_pd_timedelta.__ge__(other)
978989

979990
def __gt__(self, other):
980-
return self.delta.__gt__(other)
991+
return self._as_pd_timedelta.__gt__(other)
981992

982993
def __mul__(self, other):
983994
if is_float_object(other):
@@ -997,21 +1008,21 @@ cdef class Tick(SingleConstructorOffset):
9971008
def __truediv__(self, other):
9981009
if not isinstance(self, Tick):
9991010
# cython semantics mean the args are sometimes swapped
1000-
result = other.delta.__rtruediv__(self)
1011+
result = other._as_pd_timedelta.__rtruediv__(self)
10011012
else:
1002-
result = self.delta.__truediv__(other)
1013+
result = self._as_pd_timedelta.__truediv__(other)
10031014
return _wrap_timedelta_result(result)
10041015

10051016
def __rtruediv__(self, other):
1006-
result = self.delta.__rtruediv__(other)
1017+
result = self._as_pd_timedelta.__rtruediv__(other)
10071018
return _wrap_timedelta_result(result)
10081019

10091020
def __add__(self, other):
10101021
if isinstance(other, Tick):
10111022
if type(self) is type(other):
10121023
return type(self)(self.n + other.n)
10131024
else:
1014-
return delta_to_tick(self.delta + other.delta)
1025+
return delta_to_tick(self._as_pd_timedelta + other._as_pd_timedelta)
10151026
try:
10161027
return self._apply(other)
10171028
except ApplyTypeError:
@@ -1029,15 +1040,15 @@ cdef class Tick(SingleConstructorOffset):
10291040
# Timestamp can handle tz and nano sec, thus no need to use apply_wraps
10301041
if isinstance(other, _Timestamp):
10311042
# GH#15126
1032-
return other + self.delta
1043+
return other + self._as_pd_timedelta
10331044
elif other is NaT:
10341045
return NaT
10351046
elif cnp.is_datetime64_object(other) or PyDate_Check(other):
10361047
# PyDate_Check includes date, datetime
10371048
return Timestamp(other) + self
10381049

10391050
if cnp.is_timedelta64_object(other) or PyDelta_Check(other):
1040-
return other + self.delta
1051+
return other + self._as_pd_timedelta
10411052

10421053
raise ApplyTypeError(f"Unhandled type: {type(other).__name__}")
10431054

pandas/conftest.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -188,19 +188,15 @@ def pytest_collection_modifyitems(items, config) -> None:
188188
("read_parquet", "Passing a BlockManager to DataFrame is deprecated"),
189189
]
190190

191-
for item in items:
192-
if is_doctest:
191+
if is_doctest:
192+
for item in items:
193193
# autouse=True for the add_doctest_imports can lead to expensive teardowns
194194
# since doctest_namespace is a session fixture
195195
item.add_marker(pytest.mark.usefixtures("add_doctest_imports"))
196196

197197
for path, message in ignored_doctest_warnings:
198198
ignore_doctest_warning(item, path, message)
199199

200-
# mark all tests in the pandas/tests/frame directory with "arraymanager"
201-
if "/frame/" in item.nodeid:
202-
item.add_marker(pytest.mark.arraymanager)
203-
204200

205201
hypothesis_health_checks = [hypothesis.HealthCheck.too_slow]
206202
if Version(hypothesis.__version__) >= Version("6.83.2"):

pandas/core/arrays/arrow/array.py

+19-10
Original file line numberDiff line numberDiff line change
@@ -693,22 +693,31 @@ def _evaluate_op_method(self, other, op, arrow_funcs):
693693
other = self._box_pa(other)
694694

695695
if pa.types.is_string(pa_type) or pa.types.is_binary(pa_type):
696-
if op in [operator.add, roperator.radd, operator.mul, roperator.rmul]:
696+
if op in [operator.add, roperator.radd]:
697697
sep = pa.scalar("", type=pa_type)
698698
if op is operator.add:
699699
result = pc.binary_join_element_wise(self._pa_array, other, sep)
700700
elif op is roperator.radd:
701701
result = pc.binary_join_element_wise(other, self._pa_array, sep)
702-
else:
703-
if not (
704-
isinstance(other, pa.Scalar) and pa.types.is_integer(other.type)
705-
):
706-
raise TypeError("Can only string multiply by an integer.")
707-
result = pc.binary_join_element_wise(
708-
*([self._pa_array] * other.as_py()), sep
709-
)
710702
return type(self)(result)
711-
703+
elif op in [operator.mul, roperator.rmul]:
704+
binary = self._pa_array
705+
integral = other
706+
if not pa.types.is_integer(integral.type):
707+
raise TypeError("Can only string multiply by an integer.")
708+
pa_integral = pc.if_else(pc.less(integral, 0), 0, integral)
709+
result = pc.binary_repeat(binary, pa_integral)
710+
return type(self)(result)
711+
elif (
712+
pa.types.is_string(other.type) or pa.types.is_binary(other.type)
713+
) and op in [operator.mul, roperator.rmul]:
714+
binary = other
715+
integral = self._pa_array
716+
if not pa.types.is_integer(integral.type):
717+
raise TypeError("Can only string multiply by an integer.")
718+
pa_integral = pc.if_else(pc.less(integral, 0), 0, integral)
719+
result = pc.binary_repeat(binary, pa_integral)
720+
return type(self)(result)
712721
if (
713722
isinstance(other, pa.Scalar)
714723
and pc.is_null(other).as_py()

0 commit comments

Comments
 (0)