Skip to content

ENH: infer_objects copy kwd #50096

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ Other enhancements
- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`)
- Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`)
- Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`)
- Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`)
- :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`)
-

Expand Down
10 changes: 8 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6316,7 +6316,7 @@ def __deepcopy__(self: NDFrameT, memo=None) -> NDFrameT:
return self.copy(deep=True)

@final
def infer_objects(self: NDFrameT) -> NDFrameT:
def infer_objects(self: NDFrameT, copy: bool_t = True) -> NDFrameT:
"""
Attempt to infer better dtypes for object columns.

Expand All @@ -6325,6 +6325,12 @@ def infer_objects(self: NDFrameT) -> NDFrameT:
columns unchanged. The inference rules are the
same as during normal Series/DataFrame construction.

Parameters
----------
copy : bool, default True
Whether to make a copy for non-object or non-inferrable columns
or Series.

Returns
-------
converted : same type as input object
Expand Down Expand Up @@ -6354,7 +6360,7 @@ def infer_objects(self: NDFrameT) -> NDFrameT:
A int64
dtype: object
"""
new_mgr = self._mgr.convert()
new_mgr = self._mgr.convert(copy=copy)
return self._constructor(new_mgr).__finalize__(self, method="infer_objects")

@final
Expand Down
9 changes: 6 additions & 3 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,19 +374,22 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors)

def convert(self: T) -> T:
def convert(self: T, copy: bool) -> T:
def _convert(arr):
if is_object_dtype(arr.dtype):
# extract PandasArray for tests that patch PandasArray._typ
arr = np.asarray(arr)
return lib.maybe_convert_objects(
result = lib.maybe_convert_objects(
arr,
convert_datetime=True,
convert_timedelta=True,
convert_period=True,
)
if result is arr and copy:
return arr.copy()
return result
else:
return arr.copy()
return arr.copy() if copy else arr
Comment on lines +377 to +392
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel sorry I was just about to ask - is there a test that hits this?

if not, then is it because the plan is to "kill arraymanager" anyway?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a test that hits this?

TestInferObjects.test_copy should hit this in the CI build specific to ArrayManager

the plan is to "#48880 (comment)" anyway

I'd like this, but have no plans to push for it.


return self.apply(_convert)

Expand Down
2 changes: 2 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1981,6 +1981,8 @@ def convert(
convert_timedelta=True,
convert_period=True,
)
if copy and res_values is values:
res_values = values.copy()
res_values = ensure_block_shape(res_values, self.ndim)
return [self.make_block(res_values)]

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,10 +441,10 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
return self.apply("astype", dtype=dtype, copy=copy, errors=errors)

def convert(self: T) -> T:
def convert(self: T, copy: bool) -> T:
return self.apply(
"convert",
copy=True,
copy=copy,
)

def replace(self: T, to_replace, value, inplace: bool) -> T:
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,15 +591,15 @@ def _compare(old_mgr, new_mgr):

# noops
mgr = create_mgr("f: i8; g: f8")
new_mgr = mgr.convert()
new_mgr = mgr.convert(copy=True)
_compare(mgr, new_mgr)

# convert
mgr = create_mgr("a,b,foo: object; f: i8; g: f8")
mgr.iset(0, np.array(["1"] * N, dtype=np.object_))
mgr.iset(1, np.array(["2."] * N, dtype=np.object_))
mgr.iset(2, np.array(["foo."] * N, dtype=np.object_))
new_mgr = mgr.convert()
new_mgr = mgr.convert(copy=True)
assert new_mgr.iget(0).dtype == np.object_
assert new_mgr.iget(1).dtype == np.object_
assert new_mgr.iget(2).dtype == np.object_
Expand All @@ -612,7 +612,7 @@ def _compare(old_mgr, new_mgr):
mgr.iset(0, np.array(["1"] * N, dtype=np.object_))
mgr.iset(1, np.array(["2."] * N, dtype=np.object_))
mgr.iset(2, np.array(["foo."] * N, dtype=np.object_))
new_mgr = mgr.convert()
new_mgr = mgr.convert(copy=True)
assert new_mgr.iget(0).dtype == np.object_
assert new_mgr.iget(1).dtype == np.object_
assert new_mgr.iget(2).dtype == np.object_
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/series/methods/test_infer_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,19 @@


class TestInferObjects:
def test_copy(self, index_or_series):
# GH#50096
# case where we don't need to do inference because it is already non-object
obj = index_or_series(np.array([1, 2, 3], dtype="int64"))

result = obj.infer_objects(copy=False)
assert tm.shares_memory(result, obj)

# case where we try to do inference but can't do better than object
obj2 = index_or_series(np.array(["foo", 2], dtype=object))
result2 = obj2.infer_objects(copy=False)
assert tm.shares_memory(result2, obj2)

def test_infer_objects_series(self, index_or_series):
# GH#11221
actual = index_or_series(np.array([1, 2, 3], dtype="O")).infer_objects()
Expand Down