Skip to content

ENH: add empty() methods for DataFrame and Series #12291

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
_maybe_box_datetimelike, is_categorical_dtype, is_object_dtype,
is_internal_type, is_datetimetz, _possibly_infer_to_datetimelike,
_dict_compat)
from pandas.core.dtypes import ExtensionDtype
from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable,
Expand Down Expand Up @@ -771,6 +772,36 @@ def dot(self, other):
else: # pragma: no cover
raise TypeError('unsupported type: %s' % type(other))

@classmethod
def empty(cls, shape, dtype=float):
"""
Return a new DataFrame of given shape and type, without initializing entries.

Parameters
----------
shape : int or tuple of int
Shape of the empty DataFrame
dtype : data-type, optional
Desired output data-type

Returns
-------
out : DataFrame
DataFrame of uninitialized (arbitrary) data
with the given shape and dtype.

See Also
--------
numpy.empty : initializes an empty array of given shape and type
Series.empty : initializes an empty Series of given length and type

"""
if ExtensionDtype.is_dtype(dtype):
return cls(np.empty(shape, dtype=object).tolist(), dtype=dtype)

else:
return cls(np.empty(shape, dtype=dtype))

# ----------------------------------------------------------------------
# IO methods (to / from other formats)

Expand Down
31 changes: 31 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
_coerce_to_dtype, SettingWithCopyError,
_maybe_box_datetimelike, ABCDataFrame,
_dict_compat)
from pandas.core.dtypes import ExtensionDtype
from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
Float64Index, _ensure_index)
from pandas.core.indexing import check_bool_indexer, maybe_convert_indices
Expand Down Expand Up @@ -243,6 +244,36 @@ def from_array(cls, arr, index=None, name=None, dtype=None, copy=False,
return cls(arr, index=index, name=name, dtype=dtype, copy=copy,
fastpath=fastpath)

@classmethod
def empty(cls, length, dtype=float):
"""
Return a new Series of given length and type, without initializing entries.

Parameters
----------
length : int
Length of the empty Series
dtype : data-type, optional
Desired output data-type

Returns
-------
out : Series
Series of uninitialized (arbitrary) data
with the given length and dtype.

See Also
--------
numpy.empty : initializes an empty array of given shape and type
DataFrame.empty : initializes an empty DataFrame of given shape and type

"""
if ExtensionDtype.is_dtype(dtype):
return cls.from_array(np.empty(length, dtype=object), dtype=dtype)

else:
return cls(np.empty(length, dtype=dtype))

@property
def _constructor(self):
return Series
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1995,3 +1995,24 @@ def test_from_records_len0_with_columns(self):
self.assertTrue(np.array_equal(result.columns, ['bar']))
self.assertEqual(len(result), 0)
self.assertEqual(result.index.name, 'foo')

def test_empty(self):
from pandas.core.dtypes import DatetimeTZDtype

df = DataFrame({'dt': pd.date_range(
"2015-01-01", periods=3, tz='Europe/Brussels')})
dt = df.values.dtype

params = [
(dt, (4, 8), dt),
(None, (7, 1), float),
(np.int64, (3, 5), np.int64),
(DatetimeTZDtype, (2, 3), object),
]

for in_dt, shape, out_dt in params:
df = DataFrame.empty(shape, dtype=in_dt)
self.assertEqual(df.shape, shape)

for col in df.columns:
self.assertEqual(df[col].dtype, out_dt)
19 changes: 19 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,3 +706,22 @@ def f():
self.assertEqual(s.dtype, 'timedelta64[ns]')
s = Series([pd.NaT, np.nan, '1 Day'])
self.assertEqual(s.dtype, 'timedelta64[ns]')

def test_empty(self):
from pandas.core.dtypes import DatetimeTZDtype

df = pd.DataFrame({'dt': pd.date_range(
"2015-01-01", periods=3, tz='Europe/Brussels')})
dt = df.values.dtype

params = [
(dt, 6, dt),
(None, 3, float),
(np.int64, 10, np.int64),
(DatetimeTZDtype, 5, object),
]

for in_dt, length, out_dt in params:
s = Series.empty(length, dtype=in_dt)
self.assertEqual(s.size, length)
self.assertEqual(s.dtype, out_dt)