Skip to content

Commit 5063ad9

Browse files
lukemanleyphofl
authored andcommitted
PERF: faster constructors from ea scalars (pandas-dev#45854)
1 parent 6ca9f23 commit 5063ad9

File tree

3 files changed

+26
-1
lines changed

3 files changed

+26
-1
lines changed

asv_bench/benchmarks/frame_ctor.py

+23
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22

33
import pandas as pd
44
from pandas import (
5+
NA,
56
Categorical,
67
DataFrame,
8+
Float64Dtype,
79
MultiIndex,
810
Series,
911
Timestamp,
@@ -138,6 +140,27 @@ def time_frame_from_range(self):
138140
self.df = DataFrame(self.data)
139141

140142

143+
class FromScalar:
144+
def setup(self):
145+
self.nrows = 100_000
146+
147+
def time_frame_from_scalar_ea_float64(self):
148+
DataFrame(
149+
1.0,
150+
index=range(self.nrows),
151+
columns=list("abc"),
152+
dtype=Float64Dtype(),
153+
)
154+
155+
def time_frame_from_scalar_ea_float64_na(self):
156+
DataFrame(
157+
NA,
158+
index=range(self.nrows),
159+
columns=list("abc"),
160+
dtype=Float64Dtype(),
161+
)
162+
163+
141164
class FromArrays:
142165

143166
goal_time = 0.2

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ Performance improvements
244244
- Performance improvement in :meth:`DataFrame.duplicated` when subset consists of only one column (:issue:`45236`)
245245
- Performance improvement in :meth:`.GroupBy.transform` when broadcasting values for user-defined functions (:issue:`45708`)
246246
- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions when only a single group exists (:issue:`44977`)
247+
- Performance improvement in :class:`DataFrame` and :class:`Series` constructors for extension dtype scalars (:issue:`45854`)
247248
-
248249

249250
.. ---------------------------------------------------------------------------

pandas/core/dtypes/cast.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1652,7 +1652,8 @@ def construct_1d_arraylike_from_scalar(
16521652

16531653
if isinstance(dtype, ExtensionDtype):
16541654
cls = dtype.construct_array_type()
1655-
subarr = cls._from_sequence([value] * length, dtype=dtype)
1655+
seq = [] if length == 0 else [value]
1656+
subarr = cls._from_sequence(seq, dtype=dtype).repeat(length)
16561657

16571658
else:
16581659

0 commit comments

Comments
 (0)