Description
First, to all maintainers, thank you for your hard work! Pandas is a lifesaver.
Code Sample, a copy-pastable example
from datetime import date, datetime
import pandas as pd
df = pd.DataFrame({"date": pd.date_range("2021-01-01", "2021-01-02")})
my_datetime = datetime(2021, 1, 1)
my_date_str = "2021-01-01"
my_date = date(2021, 1, 1)
print(df[df["date"] == my_datetime])
print(df[df["date"] > my_datetime])
print(df[df["date"] == my_date_str])
print(df[df["date"] > my_date_str])
print(df[df["date"] == my_date])
print(df[df["date"] > my_date])
Output
date
0 2021-01-01
date
1 2021-01-02
date
0 2021-01-01
date
1 2021-01-02
Empty DataFrame
Columns: [date]
Index: []
---------------------------------------------------------------------------
InvalidComparison Traceback (most recent call last)
Rest of traceback:
/opt/venv/lib/python3.8/site-packages/pandas/core/arrays/datetimelike.py in _cmp_method(self, other, op)
931 try:
--> 932 other = self._validate_comparison_value(other)
933 except InvalidComparison:
/opt/venv/lib/python3.8/site-packages/pandas/core/arrays/datetimelike.py in _validate_comparison_value(self, other)
450 elif not is_list_like(other):
--> 451 raise InvalidComparison(other)
452
InvalidComparison: 2021-01-01
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
/usr/src/app/notebooks/005-whirlpool/06-get-fratecaster.py in
409 print(df[df["date"] > my_date_str])
410 print(df[df["date"] == my_date])
---> 411 print(df[df["date"] > my_date])
412
/opt/venv/lib/python3.8/site-packages/pandas/core/ops/common.py in new_method(self, other)
63 other = item_from_zerodim(other)
64
---> 65 return method(self, other)
66
67 return new_method
/opt/venv/lib/python3.8/site-packages/pandas/core/arraylike.py in __gt__(self, other)
43 @unpack_zerodim_and_defer("__gt__")
44 def __gt__(self, other):
---> 45 return self._cmp_method(other, operator.gt)
46
47 @unpack_zerodim_and_defer("__ge__")
/opt/venv/lib/python3.8/site-packages/pandas/core/series.py in _cmp_method(self, other, op)
4976 rvalues = extract_array(other, extract_numpy=True)
4977
-> 4978 res_values = ops.comparison_op(lvalues, rvalues, op)
4979
4980 return self._construct_result(res_values, name=res_name)
/opt/venv/lib/python3.8/site-packages/pandas/core/ops/array_ops.py in comparison_op(left, right, op)
227 if should_extension_dispatch(lvalues, rvalues):
228 # Call the method on lvalues
--> 229 res_values = op(lvalues, rvalues)
230
231 elif is_scalar(rvalues) and isna(rvalues):
/opt/venv/lib/python3.8/site-packages/pandas/core/ops/common.py in new_method(self, other)
63 other = item_from_zerodim(other)
64
---> 65 return method(self, other)
66
67 return new_method
/opt/venv/lib/python3.8/site-packages/pandas/core/arraylike.py in __gt__(self, other)
43 @unpack_zerodim_and_defer("__gt__")
44 def __gt__(self, other):
---> 45 return self._cmp_method(other, operator.gt)
46
47 @unpack_zerodim_and_defer("__ge__")
/opt/venv/lib/python3.8/site-packages/pandas/core/arrays/datetimelike.py in _cmp_method(self, other, op)
932 other = self._validate_comparison_value(other)
933 except InvalidComparison:
--> 934 return invalid_comparison(self, other, op)
935
936 dtype = getattr(other, "dtype", None)
/opt/venv/lib/python3.8/site-packages/pandas/core/ops/invalid.py in invalid_comparison(left, right, op)
32 else:
33 typ = type(right).__name__
---> 34 raise TypeError(f"Invalid comparison between dtype={left.dtype} and {typ}")
35 return res_values
36
TypeError: Invalid comparison between dtype=datetime64[ns] and date
Problem description
It seems reasonable to expect that if a pandas datetime column can be compared to a date string, that it should be able to be compared to a date object with exactly the same information. I see this recent PR which if I'm reading it right made it so that a pandas Timestamp won't EQUAL a date object to match stdlib behavior, which I'm fine with...users should expect equality to be really picky. However, if the goal is to match stdlib behavior, we probably shouldn't allow equality between pandas Timestamps and strings either. All that aside, my main point is that if greater than/less than comparisons are allowed between Timestamps and date strings, they should be allowed between Timestamps and date objects.
Expected Output
date
0 2021-01-01
date
1 2021-01-02
date
0 2021-01-01
date
1 2021-01-02
Empty DataFrame
Columns: [date]
Index: []
date
1 2021-01-02
Output of pd.show_versions()
INSTALLED VERSIONS
------------------
commit : 2cb96529396d93b46abab7bbc73a208e708c642e
python : 3.8.5.final.0
python-bits : 64
OS : Linux
OS-release : 3.10.0-1160.24.1.el7.x86_64
Version : #1 SMP Thu Mar 25 21:21:56 UTC 2021
machine : x86_64
processor :
byteorder : little
LC_ALL : C.UTF-8
LANG : C.UTF-8
LOCALE : en_US.UTF-8
pandas : 1.2.4
numpy : 1.20.3
pytz : 2021.1
dateutil : 2.8.1
pip : 21.1.1
setuptools : 47.1.0
Cython : None
pytest : 5.4.3
hypothesis : None
sphinx : 3.1.2
blosc : None
feather : None
xlsxwriter : None
lxml.etree : 4.6.3
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 2.11.3
IPython : 7.23.1
pandas_datareader: None
bs4 : None
bottleneck : None
fsspec : None
fastparquet : None
gcsfs : None
matplotlib : 3.4.2
numexpr : None
odfpy : None
openpyxl : 3.0.7
pandas_gbq : None
pyarrow : None
pyxlsb : None
s3fs : None
scipy : 1.6.3
sqlalchemy : 1.4.15
tables : None
tabulate : 0.8.9
xarray : 0.18.0
xlrd : None
xlwt : None
numba : None