Skip to content

Commit dfb6b9b

Browse files
committed
Merge remote-tracking branch 'upstream/main' into tst/cln/unneeded_pa_checking
2 parents bea39d9 + bf29272 commit dfb6b9b

File tree

27 files changed

+290
-90
lines changed

27 files changed

+290
-90
lines changed

.github/workflows/macos-windows.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ env:
1818
PANDAS_CI: 1
1919
PYTEST_TARGET: pandas
2020
PATTERN: "not slow and not db and not network and not single_cpu"
21-
ERROR_ON_WARNINGS: "1"
22-
2321

2422
permissions:
2523
contents: read

.github/workflows/ubuntu.yml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ jobs:
4040
- name: "Minimum Versions"
4141
env_file: actions-38-minimum_versions.yaml
4242
pattern: "not slow and not network and not single_cpu"
43-
error_on_warnings: "0"
4443
- name: "Locale: it_IT"
4544
env_file: actions-38.yaml
4645
pattern: "not slow and not network and not single_cpu"
@@ -65,12 +64,10 @@ jobs:
6564
env_file: actions-310.yaml
6665
pattern: "not slow and not network and not single_cpu"
6766
pandas_copy_on_write: "1"
68-
error_on_warnings: "0"
6967
- name: "Data Manager"
7068
env_file: actions-38.yaml
7169
pattern: "not slow and not network and not single_cpu"
7270
pandas_data_manager: "array"
73-
error_on_warnings: "0"
7471
- name: "Pypy"
7572
env_file: actions-pypy-38.yaml
7673
pattern: "not slow and not network and not single_cpu"
@@ -79,7 +76,6 @@ jobs:
7976
env_file: actions-310-numpydev.yaml
8077
pattern: "not slow and not network and not single_cpu"
8178
test_args: "-W error::DeprecationWarning -W error::FutureWarning"
82-
error_on_warnings: "0"
8379
exclude:
8480
- env_file: actions-38.yaml
8581
pyarrow_version: "8"
@@ -99,7 +95,6 @@ jobs:
9995
ENV_FILE: ci/deps/${{ matrix.env_file }}
10096
PATTERN: ${{ matrix.pattern }}
10197
EXTRA_APT: ${{ matrix.extra_apt || '' }}
102-
ERROR_ON_WARNINGS: ${{ matrix.error_on_warnings || '1' }}
10398
LANG: ${{ matrix.lang || '' }}
10499
LC_ALL: ${{ matrix.lc_all || '' }}
105100
PANDAS_DATA_MANAGER: ${{ matrix.pandas_data_manager || 'block' }}

asv_bench/benchmarks/frame_methods.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,21 @@
1717
from .pandas_vb_common import tm
1818

1919

20+
class Clip:
21+
params = [
22+
["float64", "Float64", "float64[pyarrow]"],
23+
]
24+
param_names = ["dtype"]
25+
26+
def setup(self, dtype):
27+
data = np.random.randn(100_000, 10)
28+
df = DataFrame(data, dtype=dtype)
29+
self.df = df
30+
31+
def time_clip(self, dtype):
32+
self.df.clip(-1.0, 1.0)
33+
34+
2035
class GetNumericData:
2136
def setup(self):
2237
self.df = DataFrame(np.random.randn(10000, 25))

ci/run_tests.sh

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,6 @@ if [[ "$PATTERN" ]]; then
3030
PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\""
3131
fi
3232

33-
if [[ "$ERROR_ON_WARNINGS" == "1" ]]; then
34-
for pth in $(find pandas -name '*.py' -not -path "pandas/tests/*" | sed -e 's/\.py//g' -e 's/\/__init__//g' -e 's/\//./g');
35-
do
36-
PYTEST_CMD="$PYTEST_CMD -W error:::$pth"
37-
done
38-
fi
39-
4033
echo $PYTEST_CMD
4134
sh -c "$PYTEST_CMD"
4235

doc/source/whatsnew/v2.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,8 @@ Copy-on-Write improvements
261261
- :meth:`DataFrame.replace` will now respect the Copy-on-Write mechanism
262262
when ``inplace=True``.
263263

264+
- :meth:`DataFrame.transpose` will now respect the Copy-on-Write mechanism.
265+
264266
- Arithmetic operations that can be inplace, e.g. ``ser *= 2`` will now respect the
265267
Copy-on-Write mechanism.
266268

doc/source/whatsnew/v2.1.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ Deprecations
100100

101101
Performance improvements
102102
~~~~~~~~~~~~~~~~~~~~~~~~
103-
-
103+
- Performance improvement in :meth:`DataFrame.clip` and :meth:`Series.clip` (:issue:`51472`)
104104
-
105105

106106
.. ---------------------------------------------------------------------------

pandas/_testing/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@
215215
FLOAT_PYARROW_DTYPES_STR_REPR = [
216216
str(ArrowDtype(typ)) for typ in FLOAT_PYARROW_DTYPES
217217
]
218+
DECIMAL_PYARROW_DTYPES = [pa.decimal128(7, 3)]
218219
STRING_PYARROW_DTYPES = [pa.string()]
219220
BINARY_PYARROW_DTYPES = [pa.binary()]
220221

@@ -239,6 +240,7 @@
239240
ALL_PYARROW_DTYPES = (
240241
ALL_INT_PYARROW_DTYPES
241242
+ FLOAT_PYARROW_DTYPES
243+
+ DECIMAL_PYARROW_DTYPES
242244
+ STRING_PYARROW_DTYPES
243245
+ BINARY_PYARROW_DTYPES
244246
+ TIME_PYARROW_DTYPES

pandas/_testing/_warnings.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
nullcontext,
66
)
77
import re
8+
import sys
89
from typing import (
910
Generator,
1011
Literal,
@@ -163,6 +164,17 @@ def _assert_caught_no_extra_warnings(
163164

164165
for actual_warning in caught_warnings:
165166
if _is_unexpected_warning(actual_warning, expected_warning):
167+
# GH#38630 pytest.filterwarnings does not suppress these.
168+
if actual_warning.category == ResourceWarning:
169+
# GH 44732: Don't make the CI flaky by filtering SSL-related
170+
# ResourceWarning from dependencies
171+
if "unclosed <ssl.SSLSocket" in str(actual_warning.message):
172+
continue
173+
# GH 44844: Matplotlib leaves font files open during the entire process
174+
# upon import. Don't make CI flaky if ResourceWarning raised
175+
# due to these open files.
176+
if any("matplotlib" in mod for mod in sys.modules):
177+
continue
166178
extra_warnings.append(
167179
(
168180
actual_warning.category.__name__,

pandas/core/arrays/arrow/array.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,6 +1091,7 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
10911091
pa.types.is_integer(pa_type)
10921092
or pa.types.is_floating(pa_type)
10931093
or pa.types.is_duration(pa_type)
1094+
or pa.types.is_decimal(pa_type)
10941095
):
10951096
# pyarrow only supports any/all for boolean dtype, we allow
10961097
# for other dtypes, matching our non-pyarrow behavior

pandas/core/arrays/arrow/dtype.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ def construct_from_string(cls, string: str) -> ArrowDtype:
201201
try:
202202
pa_dtype = pa.type_for_alias(base_type)
203203
except ValueError as err:
204-
has_parameters = re.search(r"\[.*\]", base_type)
204+
has_parameters = re.search(r"[\[\(].*[\]\)]", base_type)
205205
if has_parameters:
206206
# Fallback to try common temporal types
207207
try:

pandas/core/frame.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3545,10 +3545,14 @@ def transpose(self, *args, copy: bool = False) -> DataFrame:
35453545
if self._can_fast_transpose:
35463546
# Note: tests pass without this, but this improves perf quite a bit.
35473547
new_vals = self._values.T
3548-
if copy:
3548+
if copy and not using_copy_on_write():
35493549
new_vals = new_vals.copy()
35503550

3551-
result = self._constructor(new_vals, index=self.columns, columns=self.index)
3551+
result = self._constructor(
3552+
new_vals, index=self.columns, columns=self.index, copy=False
3553+
)
3554+
if using_copy_on_write() and len(self) > 0:
3555+
result._mgr.add_references(self._mgr) # type: ignore[arg-type]
35523556

35533557
elif (
35543558
self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0])

pandas/core/generic.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4015,10 +4015,10 @@ class animal locomotion
40154015
40164016
Get values at several indexes
40174017
4018-
>>> df.xs(('mammal', 'dog'))
4019-
num_legs num_wings
4020-
locomotion
4021-
walks 4 0
4018+
>>> df.xs(('mammal', 'dog', 'walks'))
4019+
num_legs 4
4020+
num_wings 0
4021+
Name: (mammal, dog, walks), dtype: int64
40224022
40234023
Get values at specified index and level
40244024
@@ -7985,24 +7985,33 @@ def _clip_with_scalar(self, lower, upper, inplace: bool_t = False):
79857985
):
79867986
raise ValueError("Cannot use an NA value as a clip threshold")
79877987

7988-
result = self
7989-
mask = isna(self._values)
7988+
mgr = self._mgr
79907989

7991-
with np.errstate(all="ignore"):
7990+
if inplace:
7991+
# cond (for putmask) identifies values to be updated.
7992+
# exclude boundary as values at the boundary should be no-ops.
79927993
if upper is not None:
7993-
subset = self <= upper
7994-
result = result.where(subset, upper, axis=None, inplace=False)
7994+
cond = self > upper
7995+
mgr = mgr.putmask(mask=cond, new=upper, align=False)
79957996
if lower is not None:
7996-
subset = self >= lower
7997-
result = result.where(subset, lower, axis=None, inplace=False)
7998-
7999-
if np.any(mask):
8000-
result[mask] = np.nan
7997+
cond = self < lower
7998+
mgr = mgr.putmask(mask=cond, new=lower, align=False)
7999+
else:
8000+
# cond (for where) identifies values to be left as-is.
8001+
# include boundary as values at the boundary should be no-ops.
8002+
mask = isna(self)
8003+
if upper is not None:
8004+
cond = mask | (self <= upper)
8005+
mgr = mgr.where(other=upper, cond=cond, align=False)
8006+
if lower is not None:
8007+
cond = mask | (self >= lower)
8008+
mgr = mgr.where(other=lower, cond=cond, align=False)
80018009

8010+
result = self._constructor(mgr)
80028011
if inplace:
80038012
return self._update_inplace(result)
80048013
else:
8005-
return result
8014+
return result.__finalize__(self)
80068015

80078016
@final
80088017
def _clip_with_one_bound(self, threshold, method, axis, inplace):

pandas/core/indexes/base.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -810,7 +810,11 @@ def _engine(
810810
target_values = self._get_engine_target()
811811
if isinstance(target_values, ExtensionArray):
812812
if isinstance(target_values, (BaseMaskedArray, ArrowExtensionArray)):
813-
return _masked_engines[target_values.dtype.name](target_values)
813+
try:
814+
return _masked_engines[target_values.dtype.name](target_values)
815+
except KeyError:
816+
# Not supported yet e.g. decimal
817+
pass
814818
elif self._engine_type is libindex.ObjectEngine:
815819
return libindex.ExtensionEngine(target_values)
816820

@@ -4948,6 +4952,8 @@ def _get_engine_target(self) -> ArrayLike:
49484952
and not (
49494953
isinstance(self._values, ArrowExtensionArray)
49504954
and is_numeric_dtype(self.dtype)
4955+
# Exclude decimal
4956+
and self.dtype.kind != "O"
49514957
)
49524958
):
49534959
# TODO(ExtensionIndex): remove special-case, just use self._values

pandas/core/internals/managers.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,9 @@ def add_references(self, mgr: BaseBlockManager) -> None:
253253
Adds the references from one manager to another. We assume that both
254254
managers have the same block structure.
255255
"""
256+
if len(self.blocks) != len(mgr.blocks):
257+
# If block structure changes, then we made a copy
258+
return
256259
for i, blk in enumerate(self.blocks):
257260
blk.refs = mgr.blocks[i].refs
258261
# Argument 1 to "add_reference" of "BlockValuesRefs" has incompatible type

pandas/io/sql.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -632,36 +632,9 @@ def read_sql(
632632
>>> pd.read_sql('test_data', 'postgres:///db_name') # doctest:+SKIP
633633
634634
Apply date parsing to columns through the ``parse_dates`` argument
635-
636-
>>> pd.read_sql('SELECT int_column, date_column FROM test_data',
637-
... conn,
638-
... parse_dates=["date_column"])
639-
int_column date_column
640-
0 0 2012-10-11
641-
1 1 2010-12-11
642-
643635
The ``parse_dates`` argument calls ``pd.to_datetime`` on the provided columns.
644636
Custom argument values for applying ``pd.to_datetime`` on a column are specified
645637
via a dictionary format:
646-
1. Ignore errors while parsing the values of "date_column"
647-
648-
>>> pd.read_sql('SELECT int_column, date_column FROM test_data',
649-
... conn,
650-
... parse_dates={"date_column": {"errors": "ignore"}})
651-
int_column date_column
652-
0 0 2012-10-11
653-
1 1 2010-12-11
654-
655-
2. Apply a dayfirst date parsing order on the values of "date_column"
656-
657-
>>> pd.read_sql('SELECT int_column, date_column FROM test_data',
658-
... conn,
659-
... parse_dates={"date_column": {"dayfirst": True}})
660-
int_column date_column
661-
0 0 2012-11-10
662-
1 1 2010-11-12
663-
664-
3. Apply custom formatting when date parsing the values of "date_column"
665638
666639
>>> pd.read_sql('SELECT int_column, date_column FROM test_data',
667640
... conn,

pandas/tests/copy_view/test_clip.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@ def test_clip_inplace_reference(using_copy_on_write):
1212
view = df[:]
1313
df.clip(lower=2, inplace=True)
1414

15-
# Clip not actually inplace right now but could be
16-
assert not np.shares_memory(get_array(df, "a"), arr_a)
17-
1815
if using_copy_on_write:
16+
assert not np.shares_memory(get_array(df, "a"), arr_a)
1917
assert df._mgr._has_no_reference(0)
2018
assert view._mgr._has_no_reference(0)
2119
tm.assert_frame_equal(df_copy, view)
20+
else:
21+
assert np.shares_memory(get_array(df, "a"), arr_a)
2222

2323

2424
def test_clip_inplace_reference_no_op(using_copy_on_write):
@@ -28,22 +28,20 @@ def test_clip_inplace_reference_no_op(using_copy_on_write):
2828
view = df[:]
2929
df.clip(lower=0, inplace=True)
3030

31+
assert np.shares_memory(get_array(df, "a"), arr_a)
32+
3133
if using_copy_on_write:
32-
assert np.shares_memory(get_array(df, "a"), arr_a)
3334
assert not df._mgr._has_no_reference(0)
3435
assert not view._mgr._has_no_reference(0)
3536
tm.assert_frame_equal(df_copy, view)
36-
else:
37-
assert not np.shares_memory(get_array(df, "a"), arr_a)
3837

3938

4039
def test_clip_inplace(using_copy_on_write):
4140
df = DataFrame({"a": [1.5, 2, 3]})
4241
arr_a = get_array(df, "a")
4342
df.clip(lower=2, inplace=True)
4443

45-
# Clip not actually inplace right now but could be
46-
assert not np.shares_memory(get_array(df, "a"), arr_a)
44+
assert np.shares_memory(get_array(df, "a"), arr_a)
4745

4846
if using_copy_on_write:
4947
assert df._mgr._has_no_reference(0)

pandas/tests/copy_view/test_methods.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1593,3 +1593,37 @@ def test_inplace_arithmetic_series_with_reference(using_copy_on_write):
15931593
tm.assert_series_equal(ser_orig, view)
15941594
else:
15951595
assert np.shares_memory(get_array(ser), get_array(view))
1596+
1597+
1598+
@pytest.mark.parametrize("copy", [True, False])
1599+
def test_transpose(using_copy_on_write, copy, using_array_manager):
1600+
df = DataFrame({"a": [1, 2, 3], "b": 1})
1601+
df_orig = df.copy()
1602+
result = df.transpose(copy=copy)
1603+
1604+
if not copy and not using_array_manager or using_copy_on_write:
1605+
assert np.shares_memory(get_array(df, "a"), get_array(result, 0))
1606+
else:
1607+
assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))
1608+
1609+
result.iloc[0, 0] = 100
1610+
if using_copy_on_write:
1611+
tm.assert_frame_equal(df, df_orig)
1612+
1613+
1614+
def test_transpose_different_dtypes(using_copy_on_write):
1615+
df = DataFrame({"a": [1, 2, 3], "b": 1.5})
1616+
df_orig = df.copy()
1617+
result = df.T
1618+
1619+
assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))
1620+
result.iloc[0, 0] = 100
1621+
if using_copy_on_write:
1622+
tm.assert_frame_equal(df, df_orig)
1623+
1624+
1625+
def test_transpose_ea_single_column(using_copy_on_write):
1626+
df = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
1627+
result = df.T
1628+
1629+
assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))

0 commit comments

Comments
 (0)