Skip to content

Numindexname #13205

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.18.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,8 @@ Bug Fixes
- Bug in ``pd.read_hdf()`` where attempting to load an HDF file with a single dataset, that had one or more categorical columns, failed unless the key argument was set to the name of the dataset. (:issue:`13231`)



- Bug in various index types, which did not propagate the name of passed index (:issue:`12309`)
- Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`)
- Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`)


Expand Down
27 changes: 27 additions & 0 deletions pandas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,33 @@ def _shallow_copy_with_infer(self, values=None, **kwargs):
pass
return Index(values, **attributes)

def _deepcopy_if_needed(self, orig, copy=False):
"""
.. versionadded:: 0.18.2

Make a copy of self if data coincides (in memory) with orig.
Subclasses should override this if self._base is not an ndarray.

Parameters
----------
orig : ndarray
other ndarray to compare self._data against
copy : boolean, default False
when False, do not run any check, just return self

Returns
-------
A copy of self if needed, otherwise self : Index
"""
if copy:
# Retrieve the "base objects", i.e. the original memory allocations
orig = orig if orig.base is None else orig.base
new = self._data if self._data.base is None else self._data.base
if orig is new:
return self.copy(deep=True)

return self

def _update_inplace(self, result, **kwargs):
# guard when called from IndexOpsMixin
raise TypeError("Index can't be updated inplace")
Expand Down
3 changes: 3 additions & 0 deletions pandas/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None,
if fastpath:
return cls._simple_new(data, name=name)

if name is None and hasattr(data, 'name'):
name = data.name

if isinstance(data, com.ABCCategorical):
data = cls._create_categorical(cls, data, categories, ordered)
elif isinstance(data, CategoricalIndex):
Expand Down
110 changes: 45 additions & 65 deletions pandas/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,28 @@ class NumericIndex(Index):
"""
_is_numeric_dtype = True

def __new__(cls, data=None, dtype=None, copy=False, name=None,
fastpath=False):

if fastpath:
return cls._simple_new(data, name=name)

# isscalar, generators handled in coerce_to_ndarray
data = cls._coerce_to_ndarray(data)

if issubclass(data.dtype.type, compat.string_types):
cls._string_data_error(data)

if copy or not com.is_dtype_equal(data.dtype, cls._default_dtype):
subarr = np.array(data, dtype=cls._default_dtype, copy=copy)
cls._assert_safe_casting(data, subarr)
else:
subarr = data

if name is None and hasattr(data, 'name'):
name = data.name
return cls._simple_new(subarr, name=name)

def _maybe_cast_slice_bound(self, label, side, kind):
"""
This function should be overloaded in subclasses that allow non-trivial
Expand Down Expand Up @@ -55,6 +77,15 @@ def _convert_tolerance(self, tolerance):
raise ValueError('tolerance argument for %s must be numeric: %r' %
(type(self).__name__, tolerance))

@classmethod
def _assert_safe_casting(cls, data, subarr):
"""
Subclasses need to override this only if the process of casting data
from some accepted dtype to the internal dtype(s) bears the risk of
truncation (e.g. float to int).
"""
pass


class Int64Index(NumericIndex):
"""
Expand Down Expand Up @@ -90,29 +121,7 @@ class Int64Index(NumericIndex):

_engine_type = _index.Int64Engine

def __new__(cls, data=None, dtype=None, copy=False, name=None,
fastpath=False, **kwargs):

if fastpath:
return cls._simple_new(data, name=name)

# isscalar, generators handled in coerce_to_ndarray
data = cls._coerce_to_ndarray(data)

if issubclass(data.dtype.type, compat.string_types):
cls._string_data_error(data)

elif issubclass(data.dtype.type, np.integer):
dtype = np.int64
subarr = np.array(data, dtype=dtype, copy=copy)
else:
subarr = np.array(data, dtype=np.int64, copy=copy)
if len(data) > 0:
if (subarr != data).any():
raise TypeError('Unsafe NumPy casting to integer, you must'
' explicitly cast')

return cls._simple_new(subarr, name=name)
_default_dtype = np.int64

@property
def inferred_type(self):
Expand Down Expand Up @@ -155,17 +164,22 @@ def equals(self, other):
if self.is_(other):
return True

try:
return com.array_equivalent(com._values_from_object(self),
com._values_from_object(other))
except TypeError:
# e.g. fails in numpy 1.6 with DatetimeIndex #1681
return False
return com.array_equivalent(com._values_from_object(self),
com._values_from_object(other))

def _wrap_joined_index(self, joined, other):
name = self.name if self.name == other.name else None
return Int64Index(joined, name=name)

@classmethod
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what the heck is this?

def _assert_safe_casting(cls, data, subarr):
"""
Ensure incoming data can be represented as ints.
"""
if not issubclass(data.dtype.type, np.integer):
if not np.array_equal(data, subarr):
raise TypeError('Unsafe NumPy casting, you must '
'explicitly cast')

Int64Index._add_numeric_methods()
Int64Index._add_logical_methods()
Expand Down Expand Up @@ -200,39 +214,7 @@ class Float64Index(NumericIndex):
_inner_indexer = _algos.inner_join_indexer_float64
_outer_indexer = _algos.outer_join_indexer_float64

def __new__(cls, data=None, dtype=None, copy=False, name=None,
fastpath=False, **kwargs):

if fastpath:
return cls._simple_new(data, name)

data = cls._coerce_to_ndarray(data)

if issubclass(data.dtype.type, compat.string_types):
cls._string_data_error(data)

if dtype is None:
dtype = np.float64
dtype = np.dtype(dtype)

# allow integer / object dtypes to be passed, but coerce to float64
if dtype.kind in ['i', 'O', 'f']:
dtype = np.float64

else:
raise TypeError("cannot support {0} dtype in "
"Float64Index".format(dtype))

try:
subarr = np.array(data, dtype=dtype, copy=copy)
except:
raise TypeError('Unsafe NumPy casting, you must explicitly cast')

# coerce to float64 for storage
if subarr.dtype != np.float64:
subarr = subarr.astype(np.float64)

return cls._simple_new(subarr, name)
_default_dtype = np.float64

@property
def inferred_type(self):
Expand Down Expand Up @@ -339,8 +321,7 @@ def equals(self, other):
return False
left, right = self._values, other._values
return ((left == right) | (self._isnan & other._isnan)).all()
except TypeError:
# e.g. fails in numpy 1.6 with DatetimeIndex #1681
except (TypeError, ValueError):
return False

def __contains__(self, other):
Expand Down Expand Up @@ -392,6 +373,5 @@ def isin(self, values, level=None):
return lib.ismember_nans(np.array(self), value_set,
isnull(list(value_set)).any())


Float64Index._add_numeric_methods()
Float64Index._add_logical_methods_disabled()
2 changes: 2 additions & 0 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,11 +372,13 @@ def test_consolidate_datetime64(self):
ser_starting.index = ser_starting.values
ser_starting = ser_starting.tz_localize('US/Eastern')
ser_starting = ser_starting.tz_convert('UTC')
ser_starting.index.name = 'starting'

ser_ending = df.ending
ser_ending.index = ser_ending.values
ser_ending = ser_ending.tz_localize('US/Eastern')
ser_ending = ser_ending.tz_convert('UTC')
ser_ending.index.name = 'ending'

df.starting = ser_starting.index
df.ending = ser_ending.index
Expand Down
47 changes: 47 additions & 0 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,53 @@ def test_hash_error(self):
type(ind).__name__):
hash(ind)

def test_copy_name(self):
# Check that "name" argument passed at initialization is honoured
# GH12309
for name, index in compat.iteritems(self.indices):
if isinstance(index, MultiIndex):
continue

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add the equivalent of these 2 tests for the ones you are skipping (e.g. MultiIndex) and PI, RI, MI

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK for PI, not for RI I think (it doesn't have any data to copy), and for MI I have the test ready but at the moment it would fail since this PR doesn't include code for MI. Will include in a separate PR.

first = index.__class__(index, copy=True, name='mario')
second = first.__class__(first, copy=False)

# Even though "copy=False", we want a new object.
self.assertIsNot(first, second)
# Not using tm.assert_index_equal() since names differ:
self.assertTrue(index.equals(first))

self.assertEqual(first.name, 'mario')
self.assertEqual(second.name, 'mario')

s1 = Series(2, index=first)
s2 = Series(3, index=second[:-1])
if not isinstance(index, CategoricalIndex): # See GH13365
s3 = s1 * s2
self.assertEqual(s3.index.name, 'mario')

def test_ensure_copied_data(self):
# Check the "copy" argument of each Index.__new__ is honoured
# GH12309
for name, index in compat.iteritems(self.indices):
init_kwargs = {}
if isinstance(index, PeriodIndex):
# Needs "freq" specification:
init_kwargs['freq'] = index.freq
elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)):
# RangeIndex cannot be initialized from data
# MultiIndex and CategoricalIndex are tested separately
continue

index_type = index.__class__
result = index_type(index.values, copy=True, **init_kwargs)
tm.assert_index_equal(index, result)
tm.assert_numpy_array_equal(index.values, result.values,
check_same='copy')

result = index_type(index.values, copy=False, **init_kwargs)
tm.assert_numpy_array_equal(index.values, result.values,
check_same='same')

def test_copy_and_deepcopy(self):
from copy import copy, deepcopy

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ def test_constructor_from_series(self):
df['date'] = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990',
'5-1-1990']
result = DatetimeIndex(df['date'], freq='MS')
expected.name = 'date'
self.assert_index_equal(result, expected)
self.assertEqual(df['date'].dtype, object)

Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,20 @@ def test_identical(self):
self.assertTrue(ci1.identical(ci1.copy()))
self.assertFalse(ci1.identical(ci2))

def test_ensure_copied_data(self):
# Check the "copy" argument of each Index.__new__ is honoured
# GH12309
# Must be tested separately from other indexes because
# self.value is not an ndarray
_base = lambda ar : ar if ar.base is None else ar.base
for index in self.indices.values():
result = CategoricalIndex(index.values, copy=True)
tm.assert_index_equal(index, result)
self.assertIsNot(_base(index.values), _base(result.values))

result = CategoricalIndex(index.values, copy=False)
self.assertIs(_base(index.values), _base(result.values))

def test_equals(self):

ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,8 @@ def test_constructor(self):
# explicit construction
index = Float64Index([1, 2, 3, 4, 5])
self.assertIsInstance(index, Float64Index)
self.assertTrue((index.values == np.array(
[1, 2, 3, 4, 5], dtype='float64')).all())
expected = np.array([1, 2, 3, 4, 5], dtype='float64')
self.assert_numpy_array_equal(index.values, expected)
index = Float64Index(np.array([1, 2, 3, 4, 5]))
self.assertIsInstance(index, Float64Index)
index = Float64Index([1., 2, 3, 4, 5])
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/test_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,17 @@ def test_numpy_array_equal_object_message(self):
with assertRaisesRegexp(AssertionError, expected):
assert_almost_equal(a, b)

def test_numpy_array_equal_copy_flag(self):
a = np.array([1, 2, 3])
b = a.copy()
c = a.view()
expected = 'array\(\[1, 2, 3\]\) is not array\(\[1, 2, 3\]\)'
with assertRaisesRegexp(AssertionError, expected):
assert_numpy_array_equal(a, b, check_same='same')
expected = 'array\(\[1, 2, 3\]\) is array\(\[1, 2, 3\]\)'
with assertRaisesRegexp(AssertionError, expected):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good

assert_numpy_array_equal(a, c, check_same='copy')

def test_assert_almost_equal_iterable_message(self):

expected = """Iterable are different
Expand Down
18 changes: 12 additions & 6 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,15 @@ def __new__(cls, data=None,
verify_integrity=True, normalize=False,
closed=None, ambiguous='raise', dtype=None, **kwargs):

# This allows to later ensure that the 'copy' parameter is honored:
if isinstance(data, Index):
ref_to_data = data._data
else:
ref_to_data = data

if name is None and hasattr(data, 'name'):
name = data.name

dayfirst = kwargs.pop('dayfirst', None)
yearfirst = kwargs.pop('yearfirst', None)

Expand Down Expand Up @@ -302,7 +311,7 @@ def __new__(cls, data=None,
raise TypeError("Already tz-aware, use tz_convert "
"to convert.")

return data
return data._deepcopy_if_needed(ref_to_data, copy)

if issubclass(data.dtype.type, compat.string_types):
data = tslib.parse_str_array_to_datetime(data, freq=freq,
Expand Down Expand Up @@ -335,10 +344,7 @@ def __new__(cls, data=None,
elif data.dtype == _INT64_DTYPE:
if isinstance(data, Int64Index):
raise TypeError('cannot convert Int64Index->DatetimeIndex')
if copy:
subarr = np.asarray(data, dtype=_NS_DTYPE)
else:
subarr = data.view(_NS_DTYPE)
subarr = data.view(_NS_DTYPE)
else:
if isinstance(data, (ABCSeries, Index)):
values = data._values
Expand Down Expand Up @@ -414,7 +420,7 @@ def __new__(cls, data=None,
if inferred:
subarr.offset = to_offset(inferred)

return subarr
return subarr._deepcopy_if_needed(ref_to_data, copy)

@classmethod
def _generate(cls, start, end, periods, name, offset,
Expand Down
Loading