Skip to content

BUG: _validate_setitem_value fails to raise for PandasArray #55759

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 18 commits into from
Closed
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import numpy as np

import pandas as pd
import pandas as pd
from pandas import (
DataFrame,
Series,
Expand Down
28 changes: 28 additions & 0 deletions pandas/_config/FirstDate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pandas as pd


import pandas as pd

# Create a pandas DataFrame with a date column
df = pd.DataFrame({'date': ['4/03/2022', '5/03/2022', '6/03/2022'], 'patients': [16, 19, 11]})

# Convert the date column to a pandas `DatetimeIndex` object with the day first
#df['year'] = pd.to_datetime(df['date'], yearFirst=False)
df['year'] = pd.to_datetime(df['date'], dayfirst=True)

# Print the DataFrame
print(df)

# Create a pandas DataFrame with a date column
df = pd.DataFrame({'date': ['04/03/2004', '03/03/2005', '03/03/2007'], 'patients': [16, 19, 11]})

# Convert the date column to a pandas `DatetimeIndex` object with the day first
#df['year'] = pd.to_datetime(df['date'], yearFirst=False)
df['date'] = pd.to_datetime(df['date'], format='%d/%m/%Y').dt.strftime('%d-%m-%Y')

# Print the DataFrame
print(df)




11 changes: 11 additions & 0 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2810,6 +2810,17 @@ cdef class QuarterBegin(QuarterOffset):
_from_name_starting_month = 1
_prefix = "QS"
_day_opt = "start"

cdef readonly:
int _period_dtype_code

def __init__(self, n=1, normalize=False, StartingMonth=None):
# Because QuarterBegin can be the freq for a Period, define its
# _period_dtype_code at construction for performance
QuarterOffset.__init__(self, n, normalize, StartingMonth)
self._period_dtype_code = PeriodDtypeCode.Q_DEC + self.StartingMonth % 12
print(self._period_dtype_code)



# ----------------------------------------------------------------------
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1015,12 +1015,13 @@ def wrapper(*args, **kwargs):
# If we made the result 2-D, squeeze it back to 1-D
result = np.squeeze(result)
else:
#55753 Added the comments for this Fix
result = np.apply_along_axis(
wrap_function(self.func),
self.axis,
self.values,
*self.args,
**self.kwargs,
*self.args,#Added args
**self.kwargs,#Added kwargs
)

# TODO: mixed type case
Expand Down
28 changes: 28 additions & 0 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,3 +564,31 @@ def _wrap_ndarray_result(self, result: np.ndarray):
# ------------------------------------------------------------------------
# String methods interface
_str_na_value = np.nan
def _validate_setitem_value(self, value):
"""
Check if we have a scalar that we can cast losslessly.

Raises
------
TypeError
"""
kind = self.dtype.kind
# TODO: get this all from np_can_hold_element?
if kind == "b":
if lib.is_bool(value):
return value

elif kind == "f":
if lib.is_integer(value) or lib.is_float(value):
return value

else:
if lib.is_integer(value) or (lib.is_float(value) and value.is_integer()):
return value
# TODO: unsigned checks

# Note: without the "str" here, the f-string rendering raises in
# py38 builds.
if lib.infer_dtype(value)!=self.dtype:
raise TypeError(f"Invalid value '{str(value)}' for dtype {self.dtype}")

5 changes: 3 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,8 +986,9 @@ def convert_dtypes(
convert_integer: bool = True,
convert_boolean: bool = True,
convert_floating: bool = True,
convert_pyarrow:bool= True,
infer_objects: bool = False,
dtype_backend: Literal["numpy_nullable", "pyarrow"] = "numpy_nullable",
dtype_backend: Literal["numpy_nullable"] = "numpy_nullable",
) -> DtypeObj:
"""
Convert objects to best possible type, and optionally,
Expand All @@ -1009,7 +1010,7 @@ def convert_dtypes(
infer_objects : bool, defaults False
Whether to also infer objects to float/int if possible. Is only hit if the
object array contains pd.NA.
dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
dtype_backend : {'numpy_nullable'}, default 'numpy_nullable'
Back-end data type applied to the resultant :class:`DataFrame`
(still experimental). Behaviour is as follows:

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/util/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def combine_hash_arrays(

def hash_pandas_object(
obj: Index | DataFrame | Series,
index: bool = True,
index: bool = False,
encoding: str = "utf8",
hash_key: str | None = _default_hash_key,
categorize: bool = True,
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -1543,7 +1543,7 @@ def get_engine(engine: str) -> BaseEngine:
raise ValueError("engine must be one of 'auto', 'sqlalchemy'")


class SQLDatabase(PandasSQL):
class SQLDatabase(PandasSQL,**connect_kwargs):
"""
This class enables conversion between DataFrame and SQL databases
using SQLAlchemy to handle DataBase abstraction.
Expand Down