From 2fbb61ccd44cd72d642646aa45517b5afad6651b Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 11 Feb 2016 10:27:31 +0000 Subject: [PATCH] ENH: add empty() methods for DataFrame and Series Added empty() methods to the Series and DataFrame classes analogous to the empty() function in the numpy library that can also accept scipy duck-type dtypes in addition to numpy dtypes. --- pandas/core/frame.py | 31 ++++++++++++++++++++++++ pandas/core/series.py | 31 ++++++++++++++++++++++++ pandas/tests/frame/test_constructors.py | 21 ++++++++++++++++ pandas/tests/series/test_constructors.py | 19 +++++++++++++++ 4 files changed, 102 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 324f30ed00bed..a73b5a9736e07 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -29,6 +29,7 @@ _maybe_box_datetimelike, is_categorical_dtype, is_object_dtype, is_internal_type, is_datetimetz, _possibly_infer_to_datetimelike, _dict_compat) +from pandas.core.dtypes import ExtensionDtype from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable, @@ -771,6 +772,36 @@ def dot(self, other): else: # pragma: no cover raise TypeError('unsupported type: %s' % type(other)) + @classmethod + def empty(cls, shape, dtype=float): + """ + Return a new DataFrame of given shape and type, without initializing entries. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty DataFrame + dtype : data-type, optional + Desired output data-type + + Returns + ------- + out : DataFrame + DataFrame of uninitialized (arbitrary) data + with the given shape and dtype. + + See Also + -------- + numpy.empty : initializes an empty array of given shape and type + Series.empty : initializes an empty Series of given length and type + + """ + if ExtensionDtype.is_dtype(dtype): + return cls(np.empty(shape, dtype=object).tolist(), dtype=dtype) + + else: + return cls(np.empty(shape, dtype=dtype)) + # ---------------------------------------------------------------------- # IO methods (to / from other formats) diff --git a/pandas/core/series.py b/pandas/core/series.py index 286a2ba79585a..a0b08bac6a6dd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -26,6 +26,7 @@ _coerce_to_dtype, SettingWithCopyError, _maybe_box_datetimelike, ABCDataFrame, _dict_compat) +from pandas.core.dtypes import ExtensionDtype from pandas.core.index import (Index, MultiIndex, InvalidIndexError, Float64Index, _ensure_index) from pandas.core.indexing import check_bool_indexer, maybe_convert_indices @@ -243,6 +244,36 @@ def from_array(cls, arr, index=None, name=None, dtype=None, copy=False, return cls(arr, index=index, name=name, dtype=dtype, copy=copy, fastpath=fastpath) + @classmethod + def empty(cls, length, dtype=float): + """ + Return a new Series of given length and type, without initializing entries. + + Parameters + ---------- + length : int + Length of the empty Series + dtype : data-type, optional + Desired output data-type + + Returns + ------- + out : Series + Series of uninitialized (arbitrary) data + with the given length and dtype. + + See Also + -------- + numpy.empty : initializes an empty array of given shape and type + DataFrame.empty : initializes an empty DataFrame of given shape and type + + """ + if ExtensionDtype.is_dtype(dtype): + return cls.from_array(np.empty(length, dtype=object), dtype=dtype) + + else: + return cls(np.empty(length, dtype=dtype)) + @property def _constructor(self): return Series diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 87c263e129361..1e8560aa77bf7 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1995,3 +1995,24 @@ def test_from_records_len0_with_columns(self): self.assertTrue(np.array_equal(result.columns, ['bar'])) self.assertEqual(len(result), 0) self.assertEqual(result.index.name, 'foo') + + def test_empty(self): + from pandas.core.dtypes import DatetimeTZDtype + + df = DataFrame({'dt': pd.date_range( + "2015-01-01", periods=3, tz='Europe/Brussels')}) + dt = df.values.dtype + + params = [ + (dt, (4, 8), dt), + (None, (7, 1), float), + (np.int64, (3, 5), np.int64), + (DatetimeTZDtype, (2, 3), object), + ] + + for in_dt, shape, out_dt in params: + df = DataFrame.empty(shape, dtype=in_dt) + self.assertEqual(df.shape, shape) + + for col in df.columns: + self.assertEqual(df[col].dtype, out_dt) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index c5783779c67c8..f7af65f97415c 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -706,3 +706,22 @@ def f(): self.assertEqual(s.dtype, 'timedelta64[ns]') s = Series([pd.NaT, np.nan, '1 Day']) self.assertEqual(s.dtype, 'timedelta64[ns]') + + def test_empty(self): + from pandas.core.dtypes import DatetimeTZDtype + + df = pd.DataFrame({'dt': pd.date_range( + "2015-01-01", periods=3, tz='Europe/Brussels')}) + dt = df.values.dtype + + params = [ + (dt, 6, dt), + (None, 3, float), + (np.int64, 10, np.int64), + (DatetimeTZDtype, 5, object), + ] + + for in_dt, length, out_dt in params: + s = Series.empty(length, dtype=in_dt) + self.assertEqual(s.size, length) + self.assertEqual(s.dtype, out_dt)