Skip to content

Commit afca9f8

Browse files
authored
ENH: infer_objects copy kwd (#50096)
* ENH: infer_objects copy kwd * GH ref
1 parent 7c0278e commit afca9f8

File tree

7 files changed

+35
-10
lines changed

7 files changed

+35
-10
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ Other enhancements
8484
- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`)
8585
- Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`)
8686
- Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`)
87+
- Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`)
8788
- :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`)
8889
-
8990

pandas/core/generic.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -6316,7 +6316,7 @@ def __deepcopy__(self: NDFrameT, memo=None) -> NDFrameT:
63166316
return self.copy(deep=True)
63176317

63186318
@final
6319-
def infer_objects(self: NDFrameT) -> NDFrameT:
6319+
def infer_objects(self: NDFrameT, copy: bool_t = True) -> NDFrameT:
63206320
"""
63216321
Attempt to infer better dtypes for object columns.
63226322
@@ -6325,6 +6325,12 @@ def infer_objects(self: NDFrameT) -> NDFrameT:
63256325
columns unchanged. The inference rules are the
63266326
same as during normal Series/DataFrame construction.
63276327
6328+
Parameters
6329+
----------
6330+
copy : bool, default True
6331+
Whether to make a copy for non-object or non-inferrable columns
6332+
or Series.
6333+
63286334
Returns
63296335
-------
63306336
converted : same type as input object
@@ -6354,7 +6360,7 @@ def infer_objects(self: NDFrameT) -> NDFrameT:
63546360
A int64
63556361
dtype: object
63566362
"""
6357-
new_mgr = self._mgr.convert()
6363+
new_mgr = self._mgr.convert(copy=copy)
63586364
return self._constructor(new_mgr).__finalize__(self, method="infer_objects")
63596365

63606366
@final

pandas/core/internals/array_manager.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -374,19 +374,22 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
374374
def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
375375
return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors)
376376

377-
def convert(self: T) -> T:
377+
def convert(self: T, copy: bool) -> T:
378378
def _convert(arr):
379379
if is_object_dtype(arr.dtype):
380380
# extract PandasArray for tests that patch PandasArray._typ
381381
arr = np.asarray(arr)
382-
return lib.maybe_convert_objects(
382+
result = lib.maybe_convert_objects(
383383
arr,
384384
convert_datetime=True,
385385
convert_timedelta=True,
386386
convert_period=True,
387387
)
388+
if result is arr and copy:
389+
return arr.copy()
390+
return result
388391
else:
389-
return arr.copy()
392+
return arr.copy() if copy else arr
390393

391394
return self.apply(_convert)
392395

pandas/core/internals/blocks.py

+2
Original file line numberDiff line numberDiff line change
@@ -1981,6 +1981,8 @@ def convert(
19811981
convert_timedelta=True,
19821982
convert_period=True,
19831983
)
1984+
if copy and res_values is values:
1985+
res_values = values.copy()
19841986
res_values = ensure_block_shape(res_values, self.ndim)
19851987
return [self.make_block(res_values)]
19861988

pandas/core/internals/managers.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -441,10 +441,10 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
441441
def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
442442
return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
443443

444-
def convert(self: T) -> T:
444+
def convert(self: T, copy: bool) -> T:
445445
return self.apply(
446446
"convert",
447-
copy=True,
447+
copy=copy,
448448
)
449449

450450
def replace(self: T, to_replace, value, inplace: bool) -> T:

pandas/tests/internals/test_internals.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -591,15 +591,15 @@ def _compare(old_mgr, new_mgr):
591591

592592
# noops
593593
mgr = create_mgr("f: i8; g: f8")
594-
new_mgr = mgr.convert()
594+
new_mgr = mgr.convert(copy=True)
595595
_compare(mgr, new_mgr)
596596

597597
# convert
598598
mgr = create_mgr("a,b,foo: object; f: i8; g: f8")
599599
mgr.iset(0, np.array(["1"] * N, dtype=np.object_))
600600
mgr.iset(1, np.array(["2."] * N, dtype=np.object_))
601601
mgr.iset(2, np.array(["foo."] * N, dtype=np.object_))
602-
new_mgr = mgr.convert()
602+
new_mgr = mgr.convert(copy=True)
603603
assert new_mgr.iget(0).dtype == np.object_
604604
assert new_mgr.iget(1).dtype == np.object_
605605
assert new_mgr.iget(2).dtype == np.object_
@@ -612,7 +612,7 @@ def _compare(old_mgr, new_mgr):
612612
mgr.iset(0, np.array(["1"] * N, dtype=np.object_))
613613
mgr.iset(1, np.array(["2."] * N, dtype=np.object_))
614614
mgr.iset(2, np.array(["foo."] * N, dtype=np.object_))
615-
new_mgr = mgr.convert()
615+
new_mgr = mgr.convert(copy=True)
616616
assert new_mgr.iget(0).dtype == np.object_
617617
assert new_mgr.iget(1).dtype == np.object_
618618
assert new_mgr.iget(2).dtype == np.object_

pandas/tests/series/methods/test_infer_objects.py

+13
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,19 @@
44

55

66
class TestInferObjects:
7+
def test_copy(self, index_or_series):
8+
# GH#50096
9+
# case where we don't need to do inference because it is already non-object
10+
obj = index_or_series(np.array([1, 2, 3], dtype="int64"))
11+
12+
result = obj.infer_objects(copy=False)
13+
assert tm.shares_memory(result, obj)
14+
15+
# case where we try to do inference but can't do better than object
16+
obj2 = index_or_series(np.array(["foo", 2], dtype=object))
17+
result2 = obj2.infer_objects(copy=False)
18+
assert tm.shares_memory(result2, obj2)
19+
720
def test_infer_objects_series(self, index_or_series):
821
# GH#11221
922
actual = index_or_series(np.array([1, 2, 3], dtype="O")).infer_objects()

0 commit comments

Comments
 (0)