Skip to content

BUG: need better inference for path in Series construction (GH9456) #9924

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 21 additions & 13 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
Parameters
----------
data : array-like, dict, or scalar value
Contains data stored in Series
Contains data stored in Series. If a dict and no index is provided,
an attempt will be made to sort the dict.
index : array-like or Index (1d)
Values must be unique and hashable, same length as data. Index
object (or other iterable of same length as data) Will default to
Expand Down Expand Up @@ -131,6 +132,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None,

else:

original_index = index
if index is not None:
index = _ensure_index(index)

Expand Down Expand Up @@ -162,21 +164,27 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
elif isinstance(data, dict):
if index is None:
if isinstance(data, OrderedDict):
index = Index(data)
original_index = data.keys()
else:
index = Index(_try_sort(data))
original_index = _try_sort(data)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is it necessary to use a new variable here. This original code looks fine.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we cannot always reorder the dict using the Index (because of incompatible data types), I keep the original index to use it if I cannot use the Index.

index = Index(original_index)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to use _ensure_index, but it does not create a MultiIndex when the dict keys are tuples. Bug?


try:
if isinstance(index, DatetimeIndex):
# coerce back to datetime objects for lookup
data = lib.fast_multiget(data, index.astype('O'),
default=np.nan)
elif isinstance(index, PeriodIndex):
data = [data.get(i, nan) for i in index]
# lib.fast_multiget raises TypeError if type(data) != dict

if lib.infer_dtype(data) == lib.infer_dtype(index.values):
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here I try to avoid a call to np.array() by checking if we can access data values using index.values. However I checked the code for infer_dtype and it may be quite complex. Maybe we should just go straight to the else?

data = lib.fast_multiget(data, index.values, default=np.nan)
else:
data = lib.fast_multiget(data, index.values,
default=np.nan)
except TypeError:
data = [data.get(i, nan) for i in index]
if isinstance(original_index, PeriodIndex):
data = [data.get(i, np.nan) for i in original_index]
else:
# np.array(['z', ('a', 'b')]) raises ValueError;
# this may happens with MultiIndex.
data = lib.fast_multiget(data, np.array(original_index),
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If lib.fast_multiget supported list as its second parameter, we could save the call to np.array.

default=np.nan)
except (TypeError, ValueError) as e:
data = [data.get(i, np.nan) for i in index]

elif isinstance(data, SingleBlockManager):
if index is None:
Expand Down
43 changes: 43 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -915,6 +915,49 @@ def test_constructor_dtype_datetime64(self):
dr = date_range('20130101',periods=3,tz='US/Eastern')
self.assertTrue(str(Series(dr).iloc[0].tz) == 'US/Eastern')

# GH 9456
d = {np.datetime64('2015-01-07T02:00:00.000000000+0200'): 4.2,
np.datetime64('2015-01-09T02:00:00.000000000+0200'): 4.0,
np.datetime64('2015-01-08T02:00:00.000000000+0200'): 3.9,
np.datetime64('2015-01-12T02:00:00.000000000+0200'): 3.5}
keys = list()
vals = list()
for k in sorted(d.keys()):
vals.append(d[k])
keys.append(k)
expected = Series(vals, keys)
expected_unsorted = Series(d.values(), d.keys())

s = Series(d)
assert_series_equal(s, expected)

s = Series(d, d.keys())
assert_series_equal(s, expected_unsorted)

d = {datetime(2015,1,7): 4.2,
datetime(2015,1,9): 4.0,
datetime(2015,1,8): 3.9,
datetime(2015,1,12): 3.5}
expected_unsorted = Series(d.values(), d.keys())

s = Series(d)
assert_series_equal(s, expected)

s = Series(d, d.keys())
assert_series_equal(s, expected_unsorted)

d = {datetime(2015,1,7): 4.2,
np.datetime64('2015-01-09T02:00:00.000000000+0200'): 4.0,
'20150108': 3.9,
np.datetime64('2015-01-12T02:00:00.000000000+0200'): 3.5}
expected_unsorted = Series(d.values(), d.keys())

s = Series(d)
assert_series_equal(s, expected_unsorted)

s = Series(d, d.keys())
assert_series_equal(s, expected_unsorted)

def test_constructor_periodindex(self):
# GH7932
# converting a PeriodIndex when put in a Series
Expand Down