Skip to content

CLN: Assorted Cleanups #27791

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Aug 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 7 additions & 10 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
ensure_int64,
ensure_object,
ensure_platform_int,
is_categorical,
is_categorical_dtype,
is_datetime64_dtype,
is_datetimelike,
Expand Down Expand Up @@ -2659,18 +2658,18 @@ def _get_codes_for_values(values, categories):
return coerce_indexer_dtype(t.lookup(vals), cats)


def _recode_for_categories(codes, old_categories, new_categories):
def _recode_for_categories(codes: np.ndarray, old_categories, new_categories):
"""
Convert a set of codes for to a new set of categories

Parameters
----------
codes : array
codes : np.ndarray
old_categories, new_categories : Index

Returns
-------
new_codes : array
new_codes : np.ndarray[np.int64]

Examples
--------
Expand Down Expand Up @@ -2725,17 +2724,15 @@ def _factorize_from_iterable(values):
If `values` has a categorical dtype, then `categories` is
a CategoricalIndex keeping the categories and order of `values`.
"""
from pandas.core.indexes.category import CategoricalIndex

if not is_list_like(values):
raise TypeError("Input must be list-like")

if is_categorical(values):
values = CategoricalIndex(values)
# The CategoricalIndex level we want to build has the same categories
if is_categorical_dtype(values):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC @jschendel didn't we just change this to make a perf fix? is this change compat with that

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I put together this version by looking at the old version and taking out the non-executed paths. So this shouldn't take a hti

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback : I think you're referring to #27669, which should not be impacted by this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OT: we might be able to remove is_categorical entirely?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yah we should. Will need to be deprecated since its in the API

values = extract_array(values)
# The Categorical we want to build has the same categories
# as values but its codes are by def [0, ..., len(n_categories) - 1]
cat_codes = np.arange(len(values.categories), dtype=values.codes.dtype)
categories = values._create_from_codes(cat_codes)
categories = Categorical.from_codes(cat_codes, dtype=values.dtype)
codes = values.codes
else:
# The value of ordered is irrelevant since we don't use cat as such,
Expand Down
12 changes: 5 additions & 7 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ def strftime(self, date_format):

Returns
-------
Index
Index of formatted strings.
ndarray
NumPy ndarray of formatted strings.

See Also
--------
Expand All @@ -180,9 +180,7 @@ def strftime(self, date_format):
'March 10, 2018, 09:00:02 AM'],
dtype='object')
"""
from pandas import Index

return Index(self._format_native_types(date_format=date_format))
return self._format_native_types(date_format=date_format).astype(object)


class TimelikeOps:
Expand Down Expand Up @@ -1018,9 +1016,9 @@ def _add_delta_tdi(self, other):

if isinstance(other, np.ndarray):
# ndarray[timedelta64]; wrap in TimedeltaIndex for op
from pandas import TimedeltaIndex
from pandas.core.arrays import TimedeltaArray

other = TimedeltaIndex(other)
other = TimedeltaArray._from_sequence(other)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if other is a timedelta64 dtype, the _from_sequence is not needed (and rather confusing the reader IMO)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is timedelta64, but not necessarily timedelta64[ns].

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel I think we should avoid using _from_sequence for that in our code base. Maybe array(.., dtype='timedelta64[ns]) instead ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do you want to avoid _from_sequence?


self_i8 = self.asi8
other_i8 = other.asi8
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/arrays/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1781,11 +1781,11 @@ def sparse_arithmetic_method(self, other):

@classmethod
def _create_comparison_method(cls, op):
def cmp_method(self, other):
op_name = op.__name__
op_name = op.__name__
if op_name in {"and_", "or_"}:
op_name = op_name[:-1]

if op_name in {"and_", "or_"}:
op_name = op_name[:-1]
def cmp_method(self, other):

if isinstance(other, (ABCSeries, ABCIndexClass)):
# Rely on pandas to unbox and dispatch to us.
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class DatetimeDelegateMixin(DatetimelikeDelegateMixin):
# Some are "raw" methods, the result is not not re-boxed in an Index
# We also have a few "extra" attrs, which may or may not be raw,
# which we we dont' want to expose in the .dt accessor.
_extra_methods = ["to_period", "to_perioddelta", "to_julian_date"]
_extra_methods = ["to_period", "to_perioddelta", "to_julian_date", "strftime"]
_extra_raw_methods = ["to_pydatetime", "_local_timestamps", "_has_same_tz"]
_extra_raw_properties = ["_box_func", "tz", "tzinfo"]
_delegated_properties = DatetimeArray._datetimelike_ops + _extra_raw_properties
Expand Down Expand Up @@ -1184,7 +1184,6 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
is_normalized = cache_readonly(DatetimeArray.is_normalized.fget) # type: ignore
_resolution = cache_readonly(DatetimeArray._resolution.fget) # type: ignore

strftime = ea_passthrough(DatetimeArray.strftime)
_has_same_tz = ea_passthrough(DatetimeArray._has_same_tz)

@property
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ class PeriodDelegateMixin(DatetimelikeDelegateMixin):

_delegate_class = PeriodArray
_delegated_properties = PeriodArray._datetimelike_ops
_delegated_methods = set(PeriodArray._datetimelike_methods) | {"_addsub_int_array"}
_delegated_methods = set(PeriodArray._datetimelike_methods) | {
"_addsub_int_array",
"strftime",
}
_raw_properties = {"is_leap_year"}


Expand Down
6 changes: 4 additions & 2 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3202,7 +3202,9 @@ def read(self, start=None, stop=None, **kwargs):
values = self.read_array(
"block{idx}_values".format(idx=i), start=_start, stop=_stop
)
blk = make_block(values, placement=items.get_indexer(blk_items))
blk = make_block(
values, placement=items.get_indexer(blk_items), ndim=len(axes)
)
blocks.append(blk)

return self.obj_type(BlockManager(blocks, axes))
Expand Down Expand Up @@ -4462,7 +4464,7 @@ def read(self, where=None, columns=None, **kwargs):
if values.ndim == 1 and isinstance(values, np.ndarray):
values = values.reshape((1, values.shape[0]))

block = make_block(values, placement=np.arange(len(cols_)))
block = make_block(values, placement=np.arange(len(cols_)), ndim=2)
mgr = BlockManager([block], [cols_, index_])
frames.append(DataFrame(mgr))

Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,13 @@ def test_concat_same_type_different_freq(self):

tm.assert_datetime_array_equal(result, expected)

def test_strftime(self, datetime_index):
arr = DatetimeArray(datetime_index)

result = arr.strftime("%Y %b")
expected = np.array(datetime_index.strftime("%Y %b"))
tm.assert_numpy_array_equal(result, expected)


class TestTimedeltaArray(SharedTests):
index_cls = pd.TimedeltaIndex
Expand Down Expand Up @@ -652,6 +659,13 @@ def test_array_interface(self, period_index):
expected = np.asarray(arr).astype("S20")
tm.assert_numpy_array_equal(result, expected)

def test_strftime(self, period_index):
arr = PeriodArray(period_index)

result = arr.strftime("%Y")
expected = np.array(period_index.strftime("%Y"))
tm.assert_numpy_array_equal(result, expected)


@pytest.mark.parametrize(
"array,casting_nats",
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/arrays/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,8 +379,6 @@ def test_compare_array(self, data, all_compare_operators):


class TestCasting:
pass

@pytest.mark.parametrize("dropna", [True, False])
def test_construct_index(self, all_data, dropna):
# ensure that we do not coerce to Float64Index, rather
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/series/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,9 @@ def test_NaT_scalar(self):
series[2] = val
assert pd.isna(series[2])

@pytest.mark.xfail(reason="PeriodDtype Series not supported yet")
def test_NaT_cast(self):
result = Series([np.nan]).astype("period[D]")
expected = Series([pd.NaT])
expected = Series([pd.NaT], dtype="period[D]")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is changing a test, no? Why are we dropping the xfail ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC the xfail was from a time before dtype="period[D]" was recognized

tm.assert_series_equal(result, expected)

def test_set_none(self):
Expand Down