From fbf5e03c0de3b67eb926609139c6b73bccf2cf05 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 14 Oct 2021 09:57:08 -0500 Subject: [PATCH] feat: rename dbtime and dbdate dtypes to avoid future conflicts with pandas --- db_dtypes/__init__.py | 4 +- samples/snippets/pandas_date_and_time.py | 12 ++---- samples/snippets/pandas_date_and_time_test.py | 4 +- tests/unit/test_arrow.py | 43 +++++++++++-------- tests/unit/test_date.py | 4 +- tests/unit/test_dtypes.py | 34 +++++++-------- tests/unit/test_time.py | 4 +- 7 files changed, 54 insertions(+), 51 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index c2e91a1..bce2bf0 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -34,8 +34,8 @@ from db_dtypes import core -date_dtype_name = "date" -time_dtype_name = "time" +date_dtype_name = "dbdate" +time_dtype_name = "dbtime" pandas_release = packaging.version.parse(pandas.__version__).release diff --git a/samples/snippets/pandas_date_and_time.py b/samples/snippets/pandas_date_and_time.py index d6b4029..bcb654d 100644 --- a/samples/snippets/pandas_date_and_time.py +++ b/samples/snippets/pandas_date_and_time.py @@ -14,16 +14,13 @@ def pandas_date_and_time(): - # fmt: off # [START bigquery_date_create] import datetime import pandas as pd import db_dtypes # noqa import to register dtypes - dates = pd.Series( - [datetime.date(2021, 9, 17), '2021-9-18'], - dtype='date') + dates = pd.Series([datetime.date(2021, 9, 17), "2021-9-18"], dtype="dbdate") # [END bigquery_date_create] # [START bigquery_date_as_datetime] @@ -33,7 +30,7 @@ def pandas_date_and_time(): # [END bigquery_date_as_datetime] # [START bigquery_date_sub] - dates2 = pd.Series(['2021-1-1', '2021-1-2'], dtype='date') + dates2 = pd.Series(["2021-1-1", "2021-1-2"], dtype="dbdate") diffs = dates - dates2 # [END bigquery_date_sub] @@ -46,9 +43,7 @@ def pandas_date_and_time(): # [END bigquery_date_do] # [START bigquery_time_create] - times = pd.Series( - [datetime.time(1, 2, 3, 456789), '12:00:00.6'], - dtype='time') + times = pd.Series([datetime.time(1, 2, 3, 456789), "12:00:00.6"], dtype="dbtime") # [END bigquery_time_create] # [START bigquery_time_as_timedelta] @@ -67,7 +62,6 @@ def pandas_date_and_time(): combined = dates + times # [END bigquery_combine2_date_time] - # fmt: on return ( dates, diff --git a/samples/snippets/pandas_date_and_time_test.py b/samples/snippets/pandas_date_and_time_test.py index b6735c6..6f78240 100644 --- a/samples/snippets/pandas_date_and_time_test.py +++ b/samples/snippets/pandas_date_and_time_test.py @@ -35,7 +35,7 @@ def test_pandas_date_and_time(): combined0, ) = pandas_date_and_time() - assert str(dates.dtype) == "date" + assert str(dates.dtype) == "dbdate" assert list(dates) == [datetime.date(2021, 9, 17), datetime.date(2021, 9, 18)] assert np.array_equal( @@ -45,7 +45,7 @@ def test_pandas_date_and_time(): assert np.array_equal(after, dates.astype("object") + do) assert np.array_equal(before, dates.astype("object") - do) - assert str(times.dtype) == "time" + assert str(times.dtype) == "dbtime" assert list(times) == [ datetime.time(1, 2, 3, 456789), datetime.time(12, 0, 0, 600000), diff --git a/tests/unit/test_arrow.py b/tests/unit/test_arrow.py index dd0aed7..d3745ea 100644 --- a/tests/unit/test_arrow.py +++ b/tests/unit/test_arrow.py @@ -25,14 +25,14 @@ @pytest.mark.parametrize( ("series", "expected"), ( - (pandas.Series([], dtype="date"), pyarrow.array([], type=pyarrow.date32())), + (pandas.Series([], dtype="dbdate"), pyarrow.array([], type=pyarrow.date32())), ( - pandas.Series([None, None, None], dtype="date"), + pandas.Series([None, None, None], dtype="dbdate"), pyarrow.array([None, None, None], type=pyarrow.date32()), ), ( pandas.Series( - [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="date" + [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="dbdate" ), pyarrow.array( [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], @@ -42,21 +42,25 @@ ( pandas.Series( [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)], - dtype="date", + dtype="dbdate", ), pyarrow.array( [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)], type=pyarrow.date32(), ), ), - (pandas.Series([], dtype="time"), pyarrow.array([], type=pyarrow.time64("ns"))), ( - pandas.Series([None, None, None], dtype="time"), + pandas.Series([], dtype="dbtime"), + pyarrow.array([], type=pyarrow.time64("ns")), + ), + ( + pandas.Series([None, None, None], dtype="dbtime"), pyarrow.array([None, None, None], type=pyarrow.time64("ns")), ), ( pandas.Series( - [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], dtype="time" + [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], + dtype="dbtime", ), pyarrow.array( [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], @@ -70,7 +74,7 @@ dt.time(12, 30, 15, 125_000), dt.time(23, 59, 59, 999_999), ], - dtype="time", + dtype="dbtime", ), pyarrow.array( [ @@ -91,14 +95,14 @@ def test_to_arrow(series, expected): @pytest.mark.parametrize( ("series", "expected"), ( - (pandas.Series([], dtype="date"), pyarrow.array([], type=pyarrow.date64())), + (pandas.Series([], dtype="dbdate"), pyarrow.array([], type=pyarrow.date64())), ( - pandas.Series([None, None, None], dtype="date"), + pandas.Series([None, None, None], dtype="dbdate"), pyarrow.array([None, None, None], type=pyarrow.date64()), ), ( pandas.Series( - [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="date" + [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="dbdate" ), pyarrow.array( [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], @@ -108,21 +112,25 @@ def test_to_arrow(series, expected): ( pandas.Series( [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)], - dtype="date", + dtype="dbdate", ), pyarrow.array( [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)], type=pyarrow.date64(), ), ), - (pandas.Series([], dtype="time"), pyarrow.array([], type=pyarrow.time32("ms"))), ( - pandas.Series([None, None, None], dtype="time"), + pandas.Series([], dtype="dbtime"), + pyarrow.array([], type=pyarrow.time32("ms")), + ), + ( + pandas.Series([None, None, None], dtype="dbtime"), pyarrow.array([None, None, None], type=pyarrow.time32("ms")), ), ( pandas.Series( - [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)], dtype="time" + [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)], + dtype="dbtime", ), pyarrow.array( [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)], @@ -131,7 +139,8 @@ def test_to_arrow(series, expected): ), ( pandas.Series( - [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], dtype="time" + [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], + dtype="dbtime", ), pyarrow.array( [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], @@ -145,7 +154,7 @@ def test_to_arrow(series, expected): dt.time(12, 30, 15, 125_000), dt.time(23, 59, 59, 999_999), ], - dtype="time", + dtype="dbtime", ), pyarrow.array( [ diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index 71e704a..c919f6d 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -38,7 +38,7 @@ ], ) def test_date_parsing(value, expected): - assert pandas.Series([value], dtype="date")[0] == expected + assert pandas.Series([value], dtype="dbdate")[0] == expected @pytest.mark.parametrize( @@ -59,4 +59,4 @@ def test_date_parsing(value, expected): ) def test_date_parsing_errors(value, error): with pytest.raises(ValueError, match=error): - pandas.Series([value], dtype="date") + pandas.Series([value], dtype="dbdate") diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index a514c47..aacbf0b 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -23,17 +23,17 @@ pandas_release = packaging.version.parse(pd.__version__).release SAMPLE_RAW_VALUES = dict( - date=(datetime.date(2021, 2, 2), "2021-2-3", None), - time=(datetime.time(1, 2, 2), "1:2:3.5", None), + dbdate=(datetime.date(2021, 2, 2), "2021-2-3", None), + dbtime=(datetime.time(1, 2, 2), "1:2:3.5", None), ) SAMPLE_VALUES = dict( - date=( + dbdate=( datetime.date(2021, 2, 2), datetime.date(2021, 2, 3), datetime.date(2021, 2, 4), datetime.date(2021, 2, 5), ), - time=( + dbtime=( datetime.time(1, 2, 2), datetime.time(1, 2, 3, 500000), datetime.time(1, 2, 4, 500000), @@ -41,13 +41,13 @@ ), ) SAMPLE_DT_VALUES = dict( - date=( + dbdate=( "2021-02-02T00:00:00.000000", "2021-02-03T00:00:00.000000", "2021-02-04T00:00:00.000000", "2021-02-05T00:00:00.000000", ), - time=( + dbtime=( "1970-01-01T01:02:02.000000", "1970-01-01T01:02:03.500000", "1970-01-01T01:02:04.500000", @@ -55,7 +55,7 @@ ), ) -for_date_and_time = pytest.mark.parametrize("dtype", ["date", "time"]) +for_date_and_time = pytest.mark.parametrize("dtype", ["dbdate", "dbtime"]) def eq_na(a1, a2): @@ -72,7 +72,7 @@ def register_dtype(): def _cls(dtype): import db_dtypes - return getattr(db_dtypes, dtype.capitalize() + "Array") + return getattr(db_dtypes, dtype[2:].capitalize() + "Array") def _make_one(dtype): @@ -322,7 +322,7 @@ def test_take(dtype, allow_fill, fill_value): if fill_value == 42: fill_value = expected_fill = ( datetime.date(1971, 4, 2) - if dtype == "date" + if dtype == "dbdate" else datetime.time(0, 42, 42, 424242) ) else: @@ -441,7 +441,7 @@ def test_astype_copy(dtype): ], ) def test_asdatetime(dtype, same): - a = _make_one("date") + a = _make_one("dbdate") for dt in dtype, np.dtype(dtype) if dtype != "datetime" else dtype: if same: b = a.astype(dt, copy=False) @@ -480,7 +480,7 @@ def test_astimedelta(dtype): .astype("timedelta64[ns]" if dtype == "timedelta" else dtype) ) - a = _cls("time")([t, None]) + a = _cls("dbtime")([t, None]) b = a.astype(dtype) np.array_equal(b[:1], expect) assert pd.isna(b[1]) and str(b[1]) == "NaT" @@ -526,7 +526,7 @@ def test_min_max_median(dtype): if pandas_release >= (1, 2): assert ( a.median() == datetime.time(1, 2, 4) - if dtype == "time" + if dtype == "dbtime" else datetime.date(2021, 2, 3) ) @@ -553,14 +553,14 @@ def test_min_max_median(dtype): if pandas_release >= (1, 2): assert ( a.median() == datetime.time(1, 2, 2, 750000) - if dtype == "time" + if dtype == "dbtime" else datetime.date(2021, 2, 2) ) def test_date_add(): - dates = _cls("date")(SAMPLE_VALUES["date"]) - times = _cls("time")(SAMPLE_VALUES["time"]) + dates = _cls("dbdate")(SAMPLE_VALUES["dbdate"]) + times = _cls("dbtime")(SAMPLE_VALUES["dbtime"]) expect = dates.astype("datetime64") + times.astype("timedelta64") assert np.array_equal(dates + times, expect) @@ -592,8 +592,8 @@ def test_date_add(): def test_date_sub(): - dates = _cls("date")(SAMPLE_VALUES["date"]) - dates2 = _cls("date")( + dates = _cls("dbdate")(SAMPLE_VALUES["dbdate"]) + dates2 = _cls("dbdate")( ( datetime.date(2021, 1, 2), datetime.date(2021, 1, 3), diff --git a/tests/unit/test_time.py b/tests/unit/test_time.py index 4a6adc8..ba45949 100644 --- a/tests/unit/test_time.py +++ b/tests/unit/test_time.py @@ -61,7 +61,7 @@ ], ) def test_time_parsing(value, expected): - assert pandas.Series([value], dtype="time")[0] == expected + assert pandas.Series([value], dtype="dbtime")[0] == expected @pytest.mark.parametrize( @@ -81,4 +81,4 @@ def test_time_parsing(value, expected): ) def test_time_parsing_errors(value, error): with pytest.raises(ValueError, match=error): - pandas.Series([value], dtype="time") + pandas.Series([value], dtype="dbtime")