From 4556f11c461088233ce60a950cc5aa8caa03db71 Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Thu, 19 Oct 2023 17:01:56 +0800 Subject: [PATCH 01/16] Fix for #55542 Fix for #55542 --- pandas/_config/FirstDate.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 pandas/_config/FirstDate.py diff --git a/pandas/_config/FirstDate.py b/pandas/_config/FirstDate.py new file mode 100644 index 0000000000000..33a1209c98988 --- /dev/null +++ b/pandas/_config/FirstDate.py @@ -0,0 +1,28 @@ +import pandas as pd + + +import pandas as pd + +# Create a pandas DataFrame with a date column +df = pd.DataFrame({'date': ['4/03/2022', '5/03/2022', '6/03/2022'], 'patients': [16, 19, 11]}) + +# Convert the date column to a pandas `DatetimeIndex` object with the day first +#df['year'] = pd.to_datetime(df['date'], yearFirst=False) +df['year'] = pd.to_datetime(df['date'], dayfirst=True) + +# Print the DataFrame +print(df) + +# Create a pandas DataFrame with a date column +df = pd.DataFrame({'date': ['04/03/2004', '03/03/2005', '03/03/2007'], 'patients': [16, 19, 11]}) + +# Convert the date column to a pandas `DatetimeIndex` object with the day first +#df['year'] = pd.to_datetime(df['date'], yearFirst=False) +df['date'] = pd.to_datetime(df['date'], format='%d/%m/%Y').dt.strftime('%d-%m-%Y') + +# Print the DataFrame +print(df) + + + + From 3ad9d3fada8bcb73ede3a4ac4326b7aeb4afb62a Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Wed, 25 Oct 2023 15:46:50 +0800 Subject: [PATCH 02/16] issue 55605 If we make Index false then we will get same hash --- pandas/_config/FirstDate.py | 2 +- pandas/core/util/hashing.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_config/FirstDate.py b/pandas/_config/FirstDate.py index 33a1209c98988..ffcc6c8aac1f7 100644 --- a/pandas/_config/FirstDate.py +++ b/pandas/_config/FirstDate.py @@ -21,7 +21,7 @@ df['date'] = pd.to_datetime(df['date'], format='%d/%m/%Y').dt.strftime('%d-%m-%Y') # Print the DataFrame -print(df) + print(df) diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 4933de3212581..62cf878c4f975 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -82,7 +82,7 @@ def combine_hash_arrays( def hash_pandas_object( obj: Index | DataFrame | Series, - index: bool = True, + index: bool = False, encoding: str = "utf8", hash_key: str | None = _default_hash_key, categorize: bool = True, From 4eac59004b20ea5bb2fdb299334147c34e267be0 Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Thu, 26 Oct 2023 15:20:40 +0800 Subject: [PATCH 03/16] ISSUE # 51044 Added _validate_setitem_Value --- pandas/core/arrays/numpy_.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index efe0c0df45e00..89d4f3cb27a84 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -564,3 +564,30 @@ def _wrap_ndarray_result(self, result: np.ndarray): # ------------------------------------------------------------------------ # String methods interface _str_na_value = np.nan + def _validate_setitem_value(self, value): + """ + Check if we have a scalar that we can cast losslessly. + + Raises + ------ + TypeError + """ + kind = self.dtype.kind + # TODO: get this all from np_can_hold_element? + if kind == "b": + if lib.is_bool(value): + return value + + elif kind == "f": + if lib.is_integer(value) or lib.is_float(value): + return value + + else: + if lib.is_integer(value) or (lib.is_float(value) and value.is_integer()): + return value + # TODO: unsigned checks + + # Note: without the "str" here, the f-string rendering raises in + # py38 builds. + raise TypeError(f"Invalid value '{str(value)}' for dtype {self.dtype}") + From bcc74365d47603cf3a4a4720330a901f658a12fd Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:44:12 +0800 Subject: [PATCH 04/16] 51044 if isinstance(value, str): raise ValueError("String values are not allowed in an integer array.") --- asv_bench/benchmarks/arithmetic.py | 2 +- pandas/core/arrays/numpy_.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 49543c166d047..1bcefb06d6d48 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -3,7 +3,7 @@ import numpy as np -import pandas as pd +import pandas as pd from pandas import ( DataFrame, Series, diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 89d4f3cb27a84..bb4ba3505674e 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -589,5 +589,6 @@ def _validate_setitem_value(self, value): # Note: without the "str" here, the f-string rendering raises in # py38 builds. - raise TypeError(f"Invalid value '{str(value)}' for dtype {self.dtype}") + if isinstance(value, str): + raise TypeError(f"Invalid value '{str(value)}' for dtype {self.dtype}") From 9dcc82d524d789b998e51527d8024a930b786959 Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Tue, 31 Oct 2023 13:14:32 +0800 Subject: [PATCH 05/16] #validate set item --- pandas/core/arrays/numpy_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index bb4ba3505674e..f34b9939c54ba 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -589,6 +589,6 @@ def _validate_setitem_value(self, value): # Note: without the "str" here, the f-string rendering raises in # py38 builds. - if isinstance(value, str): + if lib.infer_dtype(value)!=self.dtype: raise TypeError(f"Invalid value '{str(value)}' for dtype {self.dtype}") From 82cced6a1fb04dc819248a2b6f2df51fbd5b43d2 Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Tue, 31 Oct 2023 13:23:44 +0800 Subject: [PATCH 06/16] #55753 Added the comments --- pandas/core/apply.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 3b79882d3c762..0b62d45051a65 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1015,6 +1015,7 @@ def wrapper(*args, **kwargs): # If we made the result 2-D, squeeze it back to 1-D result = np.squeeze(result) else: + #55753 Added the comments for this Fix result = np.apply_along_axis( wrap_function(self.func), self.axis, From aebcb6721abbd298a64b4f5d62c24485debcc146 Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Tue, 31 Oct 2023 13:25:08 +0800 Subject: [PATCH 07/16] #55753 Added comments in the function parameter --- pandas/core/apply.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 0b62d45051a65..bacf5ed99c418 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1020,8 +1020,8 @@ def wrapper(*args, **kwargs): wrap_function(self.func), self.axis, self.values, - *self.args, - **self.kwargs, + *self.args,#Added args + **self.kwargs,#Added kwargs ) # TODO: mixed type case From 8c7a04fe289d057e73ef68d415236cad158a8890 Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Fri, 3 Nov 2023 14:33:40 +0800 Subject: [PATCH 08/16] #55785 Fix for QuaterBegin --- pandas/_libs/tslibs/offsets.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index c23bccdea3a8a..2097605846e58 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -2810,6 +2810,7 @@ cdef class QuarterBegin(QuarterOffset): _from_name_starting_month = 1 _prefix = "QS" _day_opt = "start" + _period_dtype_code = PeriodDtypeCode.Q_DEC # ---------------------------------------------------------------------- From 22112e69ffc1415e222fd3b41857b757e1078ca9 Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Fri, 3 Nov 2023 14:33:47 +0800 Subject: [PATCH 09/16] #55785 Fix for QuaterBegin --- pandas/_libs/tslibs/offsets.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 2097605846e58..6885cc6de8446 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -2810,7 +2810,7 @@ cdef class QuarterBegin(QuarterOffset): _from_name_starting_month = 1 _prefix = "QS" _day_opt = "start" - _period_dtype_code = PeriodDtypeCode.Q_DEC + _period_dtype_code = PeriodDtypeCode.Q_DEC#55785 Fix for QuaterBegin # ---------------------------------------------------------------------- From bf6cedf97317450078760562fd25837acb870212 Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Fri, 3 Nov 2023 15:50:02 +0800 Subject: [PATCH 10/16] #55785 Add the cython and offsets.pyx --- pandas/_libs/tslibs/offsets.pyx | 2 +- pandas/io/sql.py | 2 +- setup.py | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 6885cc6de8446..1dea785d09322 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -2811,7 +2811,7 @@ cdef class QuarterBegin(QuarterOffset): _prefix = "QS" _day_opt = "start" _period_dtype_code = PeriodDtypeCode.Q_DEC#55785 Fix for QuaterBegin - + print (_period_dtype_code) # ---------------------------------------------------------------------- # Month-Based Offset Classes diff --git a/pandas/io/sql.py b/pandas/io/sql.py index b4675513a99c2..febea2378f041 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1543,7 +1543,7 @@ def get_engine(engine: str) -> BaseEngine: raise ValueError("engine must be one of 'auto', 'sqlalchemy'") -class SQLDatabase(PandasSQL): +class SQLDatabase(PandasSQL,**connect_kwargs): """ This class enables conversion between DataFrame and SQL databases using SQLAlchemy to handle DataBase abstraction. diff --git a/setup.py b/setup.py index db3717efb738d..d777362c44a59 100755 --- a/setup.py +++ b/setup.py @@ -15,6 +15,12 @@ import sys from sysconfig import get_config_vars +from setuptools import setup + from Cython.Build import cythonize + +setup( + ext_modules = cythonize("C:\Users\harik\Documents\GitHub\pandas\pandas\_libs\tslibs\offsets.pyx") +) import numpy from pkg_resources import parse_version from setuptools import ( From f7df7df32083a7066b6ea2ae87137c4a903faef9 Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Fri, 3 Nov 2023 15:50:45 +0800 Subject: [PATCH 11/16] #55785 Updated setup.py for offsets.pyx --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d777362c44a59..6f6d38a1f0e33 100755 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ from sysconfig import get_config_vars from setuptools import setup - from Cython.Build import cythonize +from Cython.Build import cythonize setup( ext_modules = cythonize("C:\Users\harik\Documents\GitHub\pandas\pandas\_libs\tslibs\offsets.pyx") From 5a89f96b6201c4676e74f6a3bb9a4e14272dab4a Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Fri, 3 Nov 2023 17:04:21 +0800 Subject: [PATCH 12/16] #55875 --- pandas/_libs/tslibs/offsets.pyx | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 1dea785d09322..991193bbf2f07 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -2810,8 +2810,15 @@ cdef class QuarterBegin(QuarterOffset): _from_name_starting_month = 1 _prefix = "QS" _day_opt = "start" - _period_dtype_code = PeriodDtypeCode.Q_DEC#55785 Fix for QuaterBegin - print (_period_dtype_code) + cdef readonly: + int _period_dtype_code + + def __init__(self, n=1, normalize=False, startingMonth=None): + # Because QuarterEnd can be the freq for a Period, define its + # _period_dtype_code at construction for performance + QuarterOffset.__init__(self, n, normalize, startingMonth) + self._period_dtype_code = PeriodDtypeCode.Q_DEC + self.startingMonth % 12 + # ---------------------------------------------------------------------- # Month-Based Offset Classes From 8635bc5874cae299537cda9bf97d43ebfdf3be79 Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Fri, 3 Nov 2023 17:07:47 +0800 Subject: [PATCH 13/16] #55875 --- setup.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/setup.py b/setup.py index 6f6d38a1f0e33..db3717efb738d 100755 --- a/setup.py +++ b/setup.py @@ -15,12 +15,6 @@ import sys from sysconfig import get_config_vars -from setuptools import setup -from Cython.Build import cythonize - -setup( - ext_modules = cythonize("C:\Users\harik\Documents\GitHub\pandas\pandas\_libs\tslibs\offsets.pyx") -) import numpy from pkg_resources import parse_version from setuptools import ( From fe48ca88221ef09c53238dcf651aad5e30d497a5 Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Fri, 3 Nov 2023 17:09:26 +0800 Subject: [PATCH 14/16] #55875 --- pandas/_libs/tslibs/offsets.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 991193bbf2f07..8c6f60a794ba3 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -2810,6 +2810,7 @@ cdef class QuarterBegin(QuarterOffset): _from_name_starting_month = 1 _prefix = "QS" _day_opt = "start" + cdef readonly: int _period_dtype_code @@ -2820,6 +2821,7 @@ cdef class QuarterBegin(QuarterOffset): self._period_dtype_code = PeriodDtypeCode.Q_DEC + self.startingMonth % 12 + # ---------------------------------------------------------------------- # Month-Based Offset Classes From 0d32a4b03b4c8227fa35a4ee6297be71824f8b68 Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Fri, 3 Nov 2023 17:09:36 +0800 Subject: [PATCH 15/16] #55875 --- pandas/_libs/tslibs/offsets.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 8c6f60a794ba3..e78263f8d7c59 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -2811,7 +2811,7 @@ cdef class QuarterBegin(QuarterOffset): _prefix = "QS" _day_opt = "start" - cdef readonly: + cdef readonly: int _period_dtype_code def __init__(self, n=1, normalize=False, startingMonth=None): From 061159d78bb09fcf96e9e129f393c69dee8517bd Mon Sep 17 00:00:00 2001 From: hvsesha <65343617+hvsesha@users.noreply.github.com> Date: Thu, 9 Nov 2023 14:30:46 +0800 Subject: [PATCH 16/16] #55849 change the signature for convert_dtype --- pandas/_libs/tslibs/offsets.pyx | 9 +++++---- pandas/core/dtypes/cast.py | 5 +++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index e78263f8d7c59..6cc7516f98cb4 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -2814,11 +2814,12 @@ cdef class QuarterBegin(QuarterOffset): cdef readonly: int _period_dtype_code - def __init__(self, n=1, normalize=False, startingMonth=None): - # Because QuarterEnd can be the freq for a Period, define its + def __init__(self, n=1, normalize=False, StartingMonth=None): + # Because QuarterBegin can be the freq for a Period, define its # _period_dtype_code at construction for performance - QuarterOffset.__init__(self, n, normalize, startingMonth) - self._period_dtype_code = PeriodDtypeCode.Q_DEC + self.startingMonth % 12 + QuarterOffset.__init__(self, n, normalize, StartingMonth) + self._period_dtype_code = PeriodDtypeCode.Q_DEC + self.StartingMonth % 12 + print(self._period_dtype_code) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 716d1a78f93c5..435e23043aac7 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -986,8 +986,9 @@ def convert_dtypes( convert_integer: bool = True, convert_boolean: bool = True, convert_floating: bool = True, + convert_pyarrow:bool= True, infer_objects: bool = False, - dtype_backend: Literal["numpy_nullable", "pyarrow"] = "numpy_nullable", + dtype_backend: Literal["numpy_nullable"] = "numpy_nullable", ) -> DtypeObj: """ Convert objects to best possible type, and optionally, @@ -1009,7 +1010,7 @@ def convert_dtypes( infer_objects : bool, defaults False Whether to also infer objects to float/int if possible. Is only hit if the object array contains pd.NA. - dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' + dtype_backend : {'numpy_nullable'}, default 'numpy_nullable' Back-end data type applied to the resultant :class:`DataFrame` (still experimental). Behaviour is as follows: