diff --git a/.gitignore b/.gitignore index ff0a6aef47163..b1748ae72b8ba 100644 --- a/.gitignore +++ b/.gitignore @@ -106,3 +106,4 @@ doc/build/html/index.html doc/tmp.sv doc/source/styled.xlsx doc/source/templates/ +env/ diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 0cdfec63fd696..cd3cc282a8010 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -348,7 +348,7 @@ The following methods are available: The weights used in the window are specified by the ``win_type`` keyword. The list of recognized types are the `scipy.signal window functions - `__: +`__: - ``boxcar`` - ``triang`` diff --git a/doc/source/io.rst b/doc/source/io.rst index 4024414610a82..ba33c449e701f 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4548,11 +4548,8 @@ dtypes, including extension dtypes such as datetime with tz. Several caveats. -- The format will NOT write an ``Index``, or ``MultiIndex`` for the - ``DataFrame`` and will raise an error if a non-default one is provided. You - can ``.reset_index()`` to store the index or ``.reset_index(drop=True)`` to - ignore it. - Duplicate column names and non-string columns names are not supported +- Index level names, if specified, must be strings - Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype. - Non supported types include ``Period`` and actual python object types. These will raise a helpful error message on an attempt at serialization. diff --git a/doc/source/options.rst b/doc/source/options.rst index db3380bd4a3e7..505a5ade68de0 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -433,7 +433,7 @@ compute.use_numexpr True Use the numexpr library to computation if it is installed. plotting.matplotlib.register_converters True Register custom converters with matplotlib. Set to False to de-register. -======================================= ============ ======================================== +======================================= ============ ================================== .. _basics.console_output: diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 00726a4606cf7..206dabd1142ae 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -3,9 +3,23 @@ v0.21.1 ------- -This is a minor release from 0.21.1 and includes a number of deprecations, new -features, enhancements, and performance improvements along with a large number -of bug fixes. We recommend that all users upgrade to this version. +This is a minor bug-fix release in the 0.21.x series and includes some small regression fixes, +bug fixes and performance improvements. +We recommend that all users upgrade to this version. + +Highlights include: + +- Temporarily restore matplotlib datetime plotting functionality. This should + resolve issues for users who relied implicitly on pandas to plot datetimes + with matplotlib. See :ref:`here `. +- Improvements to the Parquet IO functions introduced in 0.21.0. See + :ref:`here `. + + +.. contents:: What's new in v0.21.1 + :local: + :backlinks: none + .. _whatsnew_0211.special: @@ -42,9 +56,16 @@ registering them when they want them. New features ~~~~~~~~~~~~ -- -- -- +.. _whatsnew_0211.enhancements.parquet: + +Improvements to the Parquet IO functionality +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- :func:`DataFrame.to_parquet` will now write non-default indexes when the + underlying engine supports it. The indexes will be preserved when reading + back in with :func:`read_parquet` (:issue:`18581`). +- :func:`read_parquet` now allows to specify the columns to read from a parquet file (:issue:`18154`) +- :func:`read_parquet` now allows to specify kwargs which are passed to the respective engine (:issue:`18216`) .. _whatsnew_0211.enhancements.other: @@ -53,7 +74,6 @@ Other Enhancements - :meth:`Timestamp.timestamp` is now available in Python 2.7. (:issue:`17329`) - :class:`Grouper` and :class:`TimeGrouper` now have a friendly repr output (:issue:`18203`). -- .. _whatsnew_0211.deprecations: @@ -69,17 +89,6 @@ Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Improved performance of plotting large series/dataframes (:issue:`18236`). -- -- - -.. _whatsnew_0211.docs: - -Documentation Changes -~~~~~~~~~~~~~~~~~~~~~ - -- -- -- .. _whatsnew_0211.bug_fixes: @@ -94,7 +103,7 @@ Conversion - Bug in :meth:`IntervalIndex.copy` when copying and ``IntervalIndex`` with non-default ``closed`` (:issue:`18339`) - Bug in :func:`DataFrame.to_dict` where columns of datetime that are tz-aware were not converted to required arrays when used with ``orient='records'``, raising``TypeError` (:issue:`18372`) - Bug in :class:`DateTimeIndex` and :meth:`date_range` where mismatching tz-aware ``start`` and ``end`` timezones would not raise an err if ``end.tzinfo`` is None (:issue:`18431`) -- +- Bug in :meth:`Series.fillna` which raised when passed a long integer on Python 2 (:issue:`18159`). Indexing ^^^^^^^^ @@ -104,7 +113,6 @@ Indexing - Bug in :class:`IntervalIndex` constructor when a list of intervals is passed with non-default ``closed`` (:issue:`18334`) - Bug in ``Index.putmask`` when an invalid mask passed (:issue:`18368`) - Bug in masked assignment of a ``timedelta64[ns]`` dtype ``Series``, incorrectly coerced to float (:issue:`18493`) -- I/O ^^^ @@ -114,21 +122,19 @@ I/O - Bug in :func:`read_csv` for handling null values in index columns when specifying ``na_filter=False`` (:issue:`5239`) - Bug in :func:`read_csv` when reading numeric category fields with high cardinality (:issue:`18186`) - Bug in :meth:`DataFrame.to_csv` when the table had ``MultiIndex`` columns, and a list of strings was passed in for ``header`` (:issue:`5539`) -- :func:`read_parquet` now allows to specify the columns to read from a parquet file (:issue:`18154`) -- :func:`read_parquet` now allows to specify kwargs which are passed to the respective engine (:issue:`18216`) - Bug in parsing integer datetime-like columns with specified format in ``read_sql`` (:issue:`17855`). - Bug in :meth:`DataFrame.to_msgpack` when serializing data of the numpy.bool_ datatype (:issue:`18390`) - Bug in :func:`read_json` not decoding when reading line deliminted JSON from S3 (:issue:`17200`) - Bug in :func:`pandas.io.json.json_normalize` to avoid modification of ``meta`` (:issue:`18610`) - Bug in :func:`to_latex` where repeated multi-index values were not printed even though a higher level index differed from the previous row (:issue:`14484`) +- Bug when reading NaN-only categorical columns in :class:`HDFStore` (:issue:`18413`) +- Bug in :meth:`DataFrame.to_latex` with ``longtable=True`` where a latex multicolumn always spanned over three columns (:issue:`17959`) Plotting ^^^^^^^^ - Bug in ``DataFrame.plot()`` and ``Series.plot()`` with :class:`DatetimeIndex` where a figure generated by them is not pickleable in Python 3 (:issue:`18439`) -- -- Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -137,15 +143,6 @@ Groupby/Resample/Rolling - Bug in ``DataFrame.resample(...)`` when there is a time change (DST) and resampling frequecy is 12h or higher (:issue:`15549`) - Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`) - Bug in ``rolling.var`` where calculation is inaccurate with a zero-valued array (:issue:`18430`) -- -- - -Sparse -^^^^^^ - -- -- -- Reshaping ^^^^^^^^^ @@ -159,9 +156,8 @@ Numeric ^^^^^^^ - Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`) -- -- -- +- Bug in :class:`TimedeltaIndex` subtraction could incorrectly overflow when ``NaT`` is present (:issue:`17791`) +- Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`) Categorical ^^^^^^^^^^^ @@ -177,9 +173,3 @@ String ^^^^^^ - :meth:`Series.str.split()` will now propogate ``NaN`` values across all expanded columns instead of ``None`` (:issue:`18450`) - -Other -^^^^^ - -- -- diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 71de6c7c3e8cf..4e9b2b9a2e922 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -681,7 +681,7 @@ def __sub__(self, other): return self._add_delta(-other) elif is_integer(other): return self.shift(-other) - elif isinstance(other, datetime): + elif isinstance(other, (datetime, np.datetime64)): return self._sub_datelike(other) elif isinstance(other, Period): return self._sub_period(other) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 50085889ad88f..3c518017a8808 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -31,6 +31,7 @@ import pandas.core.dtypes.concat as _concat from pandas.errors import PerformanceWarning from pandas.core.common import _values_from_object, _maybe_box +from pandas.core.algorithms import checked_add_with_arr from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.indexes.numeric import Int64Index, Float64Index @@ -762,7 +763,7 @@ def _sub_datelike(self, other): raise TypeError("DatetimeIndex subtraction must have the same " "timezones or no timezones") result = self._sub_datelike_dti(other) - elif isinstance(other, datetime): + elif isinstance(other, (datetime, np.datetime64)): other = Timestamp(other) if other is libts.NaT: result = self._nat_new(box=False) @@ -772,7 +773,8 @@ def _sub_datelike(self, other): "timezones or no timezones") else: i8 = self.asi8 - result = i8 - other.value + result = checked_add_with_arr(i8, -other.value, + arr_mask=self._isnan) result = self._maybe_mask_results(result, fill_value=libts.iNaT) else: diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 445adb6bd3b18..0cc35300f0d17 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -381,7 +381,8 @@ def _add_datelike(self, other): else: other = Timestamp(other) i8 = self.asi8 - result = checked_add_with_arr(i8, other.value) + result = checked_add_with_arr(i8, other.value, + arr_mask=self._isnan) result = self._maybe_mask_results(result, fill_value=iNaT) return DatetimeIndex(result, name=self.name, copy=False) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index b929dfd5a9d0b..3b7cd1d02e1d3 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1837,8 +1837,10 @@ def _can_hold_element(self, element): if tipo is not None: return (issubclass(tipo.type, (np.floating, np.integer)) and not issubclass(tipo.type, (np.datetime64, np.timedelta64))) - return (isinstance(element, (float, int, np.floating, np.int_)) and - not isinstance(element, (bool, np.bool_, datetime, timedelta, + return ( + isinstance( + element, (float, int, np.floating, np.int_, compat.long)) + and not isinstance(element, (bool, np.bool_, datetime, timedelta, np.datetime64, np.timedelta64))) def to_native_types(self, slicer=None, na_rep='', float_format=None, @@ -1886,9 +1888,11 @@ def _can_hold_element(self, element): if tipo is not None: return issubclass(tipo.type, (np.floating, np.integer, np.complexfloating)) - return (isinstance(element, - (float, int, complex, np.float_, np.int_)) and - not isinstance(element, (bool, np.bool_))) + return ( + isinstance( + element, + (float, int, complex, np.float_, np.int_, compat.long)) + and not isinstance(element, (bool, np.bool_))) def should_store(self, value): return issubclass(value.dtype.type, np.complexfloating) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 24eeb1dd94c18..bac5ac762400d 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -950,8 +950,8 @@ def get_col_type(dtype): if self.longtable: buf.write('\\endhead\n') buf.write('\\midrule\n') - buf.write('\\multicolumn{3}{r}{{Continued on next ' - 'page}} \\\\\n') + buf.write('\\multicolumn{{{n}}}{{r}}{{{{Continued on next ' + 'page}}}} \\\\\n'.format(n=len(row))) buf.write('\\midrule\n') buf.write('\\endfoot\n\n') buf.write('\\bottomrule\n') diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 4a13d2c9db944..eaaa14e756e22 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -3,7 +3,8 @@ from warnings import catch_warnings from distutils.version import LooseVersion from pandas import DataFrame, RangeIndex, Int64Index, get_option -from pandas.compat import range +from pandas.compat import string_types +from pandas.core.common import AbstractMethodError from pandas.io.common import get_filepath_or_buffer @@ -25,6 +26,11 @@ def get_engine(engine): except ImportError: pass + raise ImportError("Unable to find a usable engine; " + "tried using: 'pyarrow', 'fastparquet'.\n" + "pyarrow or fastparquet is required for parquet " + "support") + if engine not in ['pyarrow', 'fastparquet']: raise ValueError("engine must be one of 'pyarrow', 'fastparquet'") @@ -34,37 +40,75 @@ def get_engine(engine): return FastParquetImpl() -class PyArrowImpl(object): +class BaseImpl(object): + + api = None # module + + @staticmethod + def validate_dataframe(df): + + if not isinstance(df, DataFrame): + raise ValueError("to_parquet only supports IO with DataFrames") + + # must have value column names (strings only) + if df.columns.inferred_type not in {'string', 'unicode'}: + raise ValueError("parquet must have string column names") + + # index level names must be strings + valid_names = all( + isinstance(name, string_types) + for name in df.index.names + if name is not None + ) + if not valid_names: + raise ValueError("Index level names must be strings") + + def write(self, df, path, compression, **kwargs): + raise AbstractMethodError(self) + + def read(self, path, columns=None, **kwargs): + raise AbstractMethodError(self) + + +class PyArrowImpl(BaseImpl): def __init__(self): # since pandas is a dependency of pyarrow # we need to import on first use - try: import pyarrow import pyarrow.parquet except ImportError: - raise ImportError("pyarrow is required for parquet support\n\n" - "you can install via conda\n" - "conda install pyarrow -c conda-forge\n" - "\nor via pip\n" - "pip install -U pyarrow\n") - + raise ImportError( + "pyarrow is required for parquet support\n\n" + "you can install via conda\n" + "conda install pyarrow -c conda-forge\n" + "\nor via pip\n" + "pip install -U pyarrow\n" + ) if LooseVersion(pyarrow.__version__) < '0.4.1': - raise ImportError("pyarrow >= 0.4.1 is required for parquet" - "support\n\n" - "you can install via conda\n" - "conda install pyarrow -c conda-forge\n" - "\nor via pip\n" - "pip install -U pyarrow\n") - - self._pyarrow_lt_050 = LooseVersion(pyarrow.__version__) < '0.5.0' - self._pyarrow_lt_060 = LooseVersion(pyarrow.__version__) < '0.6.0' + raise ImportError( + "pyarrow >= 0.4.1 is required for parquet support\n\n" + "you can install via conda\n" + "conda install pyarrow -c conda-forge\n" + "\nor via pip\n" + "pip install -U pyarrow\n" + ) + + self._pyarrow_lt_060 = ( + LooseVersion(pyarrow.__version__) < LooseVersion('0.6.0')) + self._pyarrow_lt_070 = ( + LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0')) + self.api = pyarrow def write(self, df, path, compression='snappy', coerce_timestamps='ms', **kwargs): + self.validate_dataframe(df) + if self._pyarrow_lt_070: + self._validate_write_lt_070(df) path, _, _ = get_filepath_or_buffer(path) + if self._pyarrow_lt_060: table = self.api.Table.from_pandas(df, timestamps_to_ms=True) self.api.parquet.write_table( @@ -78,36 +122,75 @@ def write(self, df, path, compression='snappy', def read(self, path, columns=None, **kwargs): path, _, _ = get_filepath_or_buffer(path) + if self._pyarrow_lt_070: + return self.api.parquet.read_pandas(path, columns=columns, + **kwargs).to_pandas() + kwargs['use_pandas_metadata'] = True return self.api.parquet.read_table(path, columns=columns, **kwargs).to_pandas() - -class FastParquetImpl(object): + def _validate_write_lt_070(self, df): + # Compatibility shim for pyarrow < 0.7.0 + # TODO: Remove in pandas 0.22.0 + from pandas.core.indexes.multi import MultiIndex + if isinstance(df.index, MultiIndex): + msg = ( + "Multi-index DataFrames are only supported " + "with pyarrow >= 0.7.0" + ) + raise ValueError(msg) + # Validate index + if not isinstance(df.index, Int64Index): + msg = ( + "pyarrow < 0.7.0 does not support serializing {} for the " + "index; you can .reset_index() to make the index into " + "column(s), or install the latest version of pyarrow or " + "fastparquet." + ) + raise ValueError(msg.format(type(df.index))) + if not df.index.equals(RangeIndex(len(df))): + raise ValueError( + "pyarrow < 0.7.0 does not support serializing a non-default " + "index; you can .reset_index() to make the index into " + "column(s), or install the latest version of pyarrow or " + "fastparquet." + ) + if df.index.name is not None: + raise ValueError( + "pyarrow < 0.7.0 does not serialize indexes with a name; you " + "can set the index.name to None or install the latest version " + "of pyarrow or fastparquet." + ) + + +class FastParquetImpl(BaseImpl): def __init__(self): # since pandas is a dependency of fastparquet # we need to import on first use - try: import fastparquet except ImportError: - raise ImportError("fastparquet is required for parquet support\n\n" - "you can install via conda\n" - "conda install fastparquet -c conda-forge\n" - "\nor via pip\n" - "pip install -U fastparquet") - + raise ImportError( + "fastparquet is required for parquet support\n\n" + "you can install via conda\n" + "conda install fastparquet -c conda-forge\n" + "\nor via pip\n" + "pip install -U fastparquet" + ) if LooseVersion(fastparquet.__version__) < '0.1.0': - raise ImportError("fastparquet >= 0.1.0 is required for parquet " - "support\n\n" - "you can install via conda\n" - "conda install fastparquet -c conda-forge\n" - "\nor via pip\n" - "pip install -U fastparquet") - + raise ImportError( + "fastparquet >= 0.1.0 is required for parquet " + "support\n\n" + "you can install via conda\n" + "conda install fastparquet -c conda-forge\n" + "\nor via pip\n" + "pip install -U fastparquet" + ) self.api = fastparquet def write(self, df, path, compression='snappy', **kwargs): + self.validate_dataframe(df) # thriftpy/protocol/compact.py:339: # DeprecationWarning: tostring() is deprecated. # Use tobytes() instead. @@ -118,7 +201,8 @@ def write(self, df, path, compression='snappy', **kwargs): def read(self, path, columns=None, **kwargs): path, _, _ = get_filepath_or_buffer(path) - return self.api.ParquetFile(path).to_pandas(columns=columns, **kwargs) + parquet_file = self.api.ParquetFile(path) + return parquet_file.to_pandas(columns=columns, **kwargs) def to_parquet(df, path, engine='auto', compression='snappy', **kwargs): @@ -139,43 +223,7 @@ def to_parquet(df, path, engine='auto', compression='snappy', **kwargs): kwargs Additional keyword arguments passed to the engine """ - impl = get_engine(engine) - - if not isinstance(df, DataFrame): - raise ValueError("to_parquet only support IO with DataFrames") - - valid_types = {'string', 'unicode'} - - # validate index - # -------------- - - # validate that we have only a default index - # raise on anything else as we don't serialize the index - - if not isinstance(df.index, Int64Index): - raise ValueError("parquet does not support serializing {} " - "for the index; you can .reset_index()" - "to make the index into column(s)".format( - type(df.index))) - - if not df.index.equals(RangeIndex.from_range(range(len(df)))): - raise ValueError("parquet does not support serializing a " - "non-default index for the index; you " - "can .reset_index() to make the index " - "into column(s)") - - if df.index.name is not None: - raise ValueError("parquet does not serialize index meta-data on a " - "default index") - - # validate columns - # ---------------- - - # must have value column names (strings only) - if df.columns.inferred_type not in valid_types: - raise ValueError("parquet must have string column names") - return impl.write(df, path, compression=compression, **kwargs) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 40955c50f6b5f..2a1aaf2f66469 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2137,10 +2137,17 @@ def convert(self, values, nan_rep, encoding): # if we have stored a NaN in the categories # then strip it; in theory we could have BOTH # -1s in the codes and nulls :< - mask = isna(categories) - if mask.any(): - categories = categories[~mask] - codes[codes != -1] -= mask.astype(int).cumsum().values + if categories is None: + # Handle case of NaN-only categorical columns in which case + # the categories are an empty array; when this is stored, + # pytables cannot write a zero-len array, so on readback + # the categories would be None and `read_hdf()` would fail. + categories = Index([], dtype=np.float64) + else: + mask = isna(categories) + if mask.any(): + categories = categories[~mask] + codes[codes != -1] -= mask.astype(int).cumsum().values self.data = Categorical.from_codes(codes, categories=categories, diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 8d9ac59cf9883..20a9916ad6bc4 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -211,6 +211,40 @@ def test_ufunc_coercions(self): tm.assert_index_equal(result, exp) assert result.freq == 'D' + def test_datetimeindex_sub_timestamp_overflow(self): + dtimax = pd.to_datetime(['now', pd.Timestamp.max]) + dtimin = pd.to_datetime(['now', pd.Timestamp.min]) + + tsneg = Timestamp('1950-01-01') + ts_neg_variants = [tsneg, + tsneg.to_pydatetime(), + tsneg.to_datetime64().astype('datetime64[ns]'), + tsneg.to_datetime64().astype('datetime64[D]')] + + tspos = Timestamp('1980-01-01') + ts_pos_variants = [tspos, + tspos.to_pydatetime(), + tspos.to_datetime64().astype('datetime64[ns]'), + tspos.to_datetime64().astype('datetime64[D]')] + + for variant in ts_neg_variants: + with pytest.raises(OverflowError): + dtimax - variant + + expected = pd.Timestamp.max.value - tspos.value + for variant in ts_pos_variants: + res = dtimax - variant + assert res[1].value == expected + + expected = pd.Timestamp.min.value - tsneg.value + for variant in ts_neg_variants: + res = dtimin - variant + assert res[1].value == expected + + for variant in ts_pos_variants: + with pytest.raises(OverflowError): + dtimin - variant + def test_week_of_month_frequency(self): # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise d1 = date(2002, 9, 1) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index f4f669ee1d087..3cf56dc5115c2 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -1282,3 +1282,23 @@ def test_add_overflow(self): result = (to_timedelta([pd.NaT, '5 days', '1 hours']) + to_timedelta(['7 seconds', pd.NaT, '4 hours'])) tm.assert_index_equal(result, exp) + + def test_timedeltaindex_add_timestamp_nat_masking(self): + # GH17991 checking for overflow-masking with NaT + tdinat = pd.to_timedelta(['24658 days 11:15:00', 'NaT']) + + tsneg = Timestamp('1950-01-01') + ts_neg_variants = [tsneg, + tsneg.to_pydatetime(), + tsneg.to_datetime64().astype('datetime64[ns]'), + tsneg.to_datetime64().astype('datetime64[D]')] + + tspos = Timestamp('1980-01-01') + ts_pos_variants = [tspos, + tspos.to_pydatetime(), + tspos.to_datetime64().astype('datetime64[ns]'), + tspos.to_datetime64().astype('datetime64[D]')] + + for variant in ts_neg_variants + ts_pos_variants: + res = tdinat + variant + assert res[1] is pd.NaT diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index c182db35c0c89..4e59779cb9b47 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1245,7 +1245,9 @@ class TestCanHoldElement(object): @pytest.mark.parametrize('value, dtype', [ (1, 'i8'), (1.0, 'f8'), + (2**63, 'f8'), (1j, 'complex128'), + (2**63, 'complex128'), (True, 'bool'), (np.timedelta64(20, 'ns'), '