Skip to content

BUG: preserve object-dtype index when accessing DataFrame column / PERF: improve perf of Series fastpath constructor #42950

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions asv_bench/benchmarks/series_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np

from pandas import (
Index,
NaT,
Series,
date_range,
Expand All @@ -12,20 +13,23 @@


class SeriesConstructor:

params = [None, "dict"]
param_names = ["data"]

def setup(self, data):
def setup(self):
self.idx = date_range(
start=datetime(2015, 10, 26), end=datetime(2016, 1, 1), freq="50s"
)
dict_data = dict(zip(self.idx, range(len(self.idx))))
self.data = None if data is None else dict_data
self.data = dict(zip(self.idx, range(len(self.idx))))
self.array = np.array([1, 2, 3])
self.idx2 = Index(["a", "b", "c"])

def time_constructor(self, data):
def time_constructor_dict(self):
Series(data=self.data, index=self.idx)

def time_constructor_no_data(self):
Series(data=None, index=self.idx)

def time_constructor_fastpath(self):
Series(self.array, index=self.idx, name="name", fastpath=True)


class ToFrame:
params = [["int64", "datetime64[ns]", "category", "Int64"], [None, "foo"]]
Expand Down
21 changes: 10 additions & 11 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,8 +447,12 @@ def __init__(
data = SingleArrayManager.from_array(data, index)

generic.NDFrame.__init__(self, data)
self.name = name
self._set_axis(0, index, fastpath=True)
if fastpath:
# skips validation of the name
object.__setattr__(self, "_name", name)
else:
self.name = name
self._set_axis(0, index)

def _init_dict(
self, data, index: Index | None = None, dtype: DtypeObj | None = None
Expand Down Expand Up @@ -529,15 +533,14 @@ def _constructor_expanddim(self) -> type[DataFrame]:
def _can_hold_na(self) -> bool:
return self._mgr._can_hold_na

def _set_axis(self, axis: int, labels, fastpath: bool = False) -> None:
def _set_axis(self, axis: int, labels) -> None:
"""
Override generic, we want to set the _typ here.

This is called from the cython code when we set the `index` attribute
directly, e.g. `series.index = [1, 2, 3]`.
"""
if not fastpath:
labels = ensure_index(labels)
labels = ensure_index(labels)

if labels._is_all_dates:
deep_labels = labels
Expand All @@ -549,17 +552,13 @@ def _set_axis(self, axis: int, labels, fastpath: bool = False) -> None:
):
try:
labels = DatetimeIndex(labels)
# need to set here because we changed the index
if fastpath:
self._mgr.set_axis(axis, labels)
except (tslibs.OutOfBoundsDatetime, ValueError):
# labels may exceeds datetime bounds,
# or not be a DatetimeIndex
pass

if not fastpath:
# The ensure_index call above ensures we have an Index object
self._mgr.set_axis(axis, labels)
# The ensure_index call above ensures we have an Index object
self._mgr.set_axis(axis, labels)

# ndarray compatibility
@property
Expand Down