Skip to content

BUG: Common NumericIndex.__new__, with fixed name handling #12331

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pandas/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None,
if fastpath:
return cls._simple_new(data, name=name)

if name is None and hasattr(data, 'name'):
name = data.name

if isinstance(data, com.ABCCategorical):
data = cls._create_categorical(cls, data, categories, ordered)
elif isinstance(data, CategoricalIndex):
Expand Down
13 changes: 7 additions & 6 deletions pandas/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,10 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
name=None, **kwargs):

# compat with Index
if name is not None:
names = name
if name is None and hasattr(levels, 'name'):
name = levels.name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how does this check help? levels is a list-like

if isinstance(levels, MultiIndex):
return levels.copy(name=name, names=names, deep=copy)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this could also be true I agree, but then it should be before the prior check, yes?
I also really don't like the name, names biz. I believe Index has the right way to do this (IOW check for both name and names if both are specified its an error, otherwise use the non None one)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not really following you here... where does Index do this?

if levels is None or labels is None:
raise TypeError("Must pass both levels and labels")
if len(levels) != len(labels):
Expand All @@ -77,8 +79,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
if len(levels) == 1:
if names:
name = names[0]
else:
name = None
return Index(levels[0], name=name, copy=True).take(labels[0])

result = object.__new__(MultiIndex)
Expand Down Expand Up @@ -333,7 +333,7 @@ def set_labels(self, labels, level=None, inplace=False,
labels = property(fget=_get_labels, fset=__set_labels)

def copy(self, names=None, dtype=None, levels=None, labels=None,
deep=False, _set_identity=False):
deep=False, name=None, _set_identity=False):
"""
Make a copy of this object. Names, dtype, levels and labels can be
passed and will be set on new copy.
Expand All @@ -344,6 +344,7 @@ def copy(self, names=None, dtype=None, levels=None, labels=None,
dtype : numpy dtype or pandas type, optional
levels : sequence, optional
labels : sequence, optional
name : object, optional
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this shoul dlook much like the Index one (as far as the name checking goes)


Returns
-------
Expand All @@ -366,7 +367,7 @@ def copy(self, names=None, dtype=None, levels=None, labels=None,
names = self.names
return MultiIndex(levels=levels, labels=labels, names=names,
sortorder=self.sortorder, verify_integrity=False,
_set_identity=_set_identity)
name=name, _set_identity=_set_identity)

def __array__(self, dtype=None):
""" the array interface, return my values """
Expand Down
93 changes: 28 additions & 65 deletions pandas/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,32 @@ class NumericIndex(Index):
"""
_is_numeric_dtype = True

def __new__(cls, data=None, dtype=None, copy=False, name=None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dont' think we need to accept kwargs here (or if we do, then use use something like core.generic._validate_kwargs to allow certain compat keywords (but then we don't advertise them)

(note the location/name of this is changing, but ok for now)

fastpath=False, **kwargs):

if fastpath:
return cls._simple_new(data, name=name)

# isscalar, generators handled in coerce_to_ndarray
data = cls._coerce_to_ndarray(data)

if issubclass(data.dtype.type, compat.string_types):
cls._string_data_error(data)

if copy or data.dtype != cls._default_dtype:
try:
subarr = np.array(data, dtype=cls._default_dtype, copy=copy)
assert((subarr == data) | np.isnan(subarr)).all()
except:
raise TypeError('Unsafe NumPy casting, you must '
'explicitly cast')
else:
subarr = data

if name is None and hasattr(data, 'name'):
name = data.name
return cls._simple_new(subarr, name=name)

def _maybe_cast_slice_bound(self, label, side, kind):
"""
This function should be overloaded in subclasses that allow non-trivial
Expand Down Expand Up @@ -94,35 +120,7 @@ class Int64Index(NumericIndex):
_can_hold_na = False

_engine_type = _index.Int64Engine

def __new__(cls, data=None, dtype=None, copy=False, name=None,
fastpath=False, **kwargs):

if fastpath:
return cls._simple_new(data, name=name)

# isscalar, generators handled in coerce_to_ndarray
data = cls._coerce_to_ndarray(data)

if issubclass(data.dtype.type, compat.string_types):
cls._string_data_error(data)

elif issubclass(data.dtype.type, np.integer):
# don't force the upcast as we may be dealing
# with a platform int
if (dtype is None or
not issubclass(np.dtype(dtype).type, np.integer)):
dtype = np.int64

subarr = np.array(data, dtype=dtype, copy=copy)
else:
subarr = np.array(data, dtype=np.int64, copy=copy)
if len(data) > 0:
if (subarr != data).any():
raise TypeError('Unsafe NumPy casting to integer, you must'
' explicitly cast')

return cls._simple_new(subarr, name=name)
_default_dtype = np.int64

@property
def inferred_type(self):
Expand Down Expand Up @@ -192,42 +190,7 @@ class Float64Index(NumericIndex):
_inner_indexer = _algos.inner_join_indexer_float64
_outer_indexer = _algos.outer_join_indexer_float64

def __new__(cls, data=None, dtype=None, copy=False, name=None,
fastpath=False, **kwargs):

if fastpath:
return cls._simple_new(data, name)

data = cls._coerce_to_ndarray(data)

if issubclass(data.dtype.type, compat.string_types):
cls._string_data_error(data)

if dtype is None:
dtype = np.float64
dtype = np.dtype(dtype)

# allow integer / object dtypes to be passed, but coerce to float64
if dtype.kind in ['i', 'O']:
dtype = np.float64

elif dtype.kind in ['f']:
pass

else:
raise TypeError("cannot support {0} dtype in "
"Float64Index".format(dtype))

try:
subarr = np.array(data, dtype=dtype, copy=copy)
except:
raise TypeError('Unsafe NumPy casting, you must explicitly cast')

# coerce to float64 for storage
if subarr.dtype != np.float64:
subarr = subarr.astype(np.float64)

return cls._simple_new(subarr, name)
_default_dtype = np.float64

@property
def inferred_type(self):
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,11 +372,13 @@ def test_consolidate_datetime64(self):
ser_starting.index = ser_starting.values
ser_starting = ser_starting.tz_localize('US/Eastern')
ser_starting = ser_starting.tz_convert('UTC')
ser_starting.index.name = 'starting'

ser_ending = df.ending
ser_ending.index = ser_ending.values
ser_ending = ser_ending.tz_localize('US/Eastern')
ser_ending = ser_ending.tz_convert('UTC')
ser_ending.index.name = 'ending'

df.starting = ser_starting.index
df.ending = ser_ending.index
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,3 +654,19 @@ def test_fillna(self):
expected[1] = True
self.assert_numpy_array_equal(idx._isnan, expected)
self.assertTrue(idx.hasnans)

def test_copy(self):
# GH12309
for name, index in compat.iteritems(self.indices):
first = index.__class__(index, copy=True, name='mario')
second = first.__class__(first, copy=False)
self.assertTrue(index.equals(first))
# Even though "copy=False", we want a new object:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

blank line

self.assertTrue(id(first) != id(second))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use .identical(...)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In place of .equals()?! But the name is different...


if isinstance(index, MultiIndex) and len(index.levels) > 1:
# No unique "name" attribute (each level has its own)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

override this for the MultiIndex tests

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and just skip the MultIndex in the iteration (as you need a more comprehensive MI test for copy)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you mean by "override"/"skip"?

continue

self.assertEqual(first.name, 'mario')
self.assertEqual(second.name, 'mario')
8 changes: 8 additions & 0 deletions pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -831,3 +831,11 @@ def test_ufunc_coercions(self):
tm.assertIsInstance(result, Float64Index)
exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x')
tm.assert_index_equal(result, exp)

def test_ensure_copied_data(self):
data = np.array([1, 2, 3], dtype='int64')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add the issue number

idx = Int64Index(data, copy=True)
self.assert_numpy_array_equal(data, idx._data)
tm.assertIsNot(data, idx._data)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a VERY specific test, can you make this much more general (e.g. use .create_data()) to ensure that this is correct for numeric Index types. Furthermore, would be ideal to make this test for ALL Indexes (IOW put this in TestBase)

idx2 = Int64Index(data, copy=False)
tm.assertIs(data, idx2._data)
4 changes: 2 additions & 2 deletions pandas/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,9 +725,9 @@ def test_ensure_platform_int():
pi = com._ensure_platform_int(x)
assert (pi.dtype == np.int_)

# int32
# int32 - "dtype" argument is irrelevant
x = Int64Index([1, 2, 3], dtype='int32')
assert (x.dtype == np.int32)
assert (x.dtype == np.int64)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think let's raise on a passed dtype, see what breaks (raise a TypeError)


pi = com._ensure_platform_int(x)
assert (pi.dtype == np.int_)
Expand Down
3 changes: 3 additions & 0 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,9 @@ def __new__(cls, data=None,
verify_integrity=True, normalize=False,
closed=None, ambiguous='raise', dtype=None, **kwargs):

if name is None and hasattr(data, 'name'):
name = data.name

dayfirst = kwargs.pop('dayfirst', None)
yearfirst = kwargs.pop('yearfirst', None)

Expand Down
3 changes: 3 additions & 0 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
raise ValueError('Periods must be a number, got %s' %
str(periods))

if name is None and hasattr(data, 'name'):
name = data.name

if data is None:
if ordinal is not None:
data = np.asarray(ordinal, dtype=np.int64)
Expand Down
5 changes: 3 additions & 2 deletions pandas/tseries/tdi.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,9 @@ def __new__(cls, data=None, unit=None,

if isinstance(data, TimedeltaIndex) and freq is None and name is None:
if copy:
data = data.copy()
return data
return data.copy()
else:
return data._shallow_copy()

freq_infer = False
if not isinstance(freq, DateOffset):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tseries/tests/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1649,7 +1649,7 @@ def test_join_self(self):
kinds = 'outer', 'inner', 'left', 'right'
for kind in kinds:
joined = index.join(index, how=kind)
self.assertIs(index, joined)
tm.assert_index_equal(index, joined)

def test_factorize(self):
idx1 = TimedeltaIndex(['1 day', '1 day', '2 day', '2 day', '3 day',
Expand Down