Skip to content

Commit c5396f6

Browse files
jrebackTomAugspurger
authored andcommitted
API/PERF: add policy argument to constructors, pandas-dev#10556
- closes pandas-dev#10556, add policy argument to constructors - closes pandas-dev#9216, all passing of dict with view directly to the API - closes pandas-dev#5902
1 parent 81149fb commit c5396f6

File tree

7 files changed

+163
-55
lines changed

7 files changed

+163
-55
lines changed

pandas/core/config_init.py

+22-9
Original file line numberDiff line numberDiff line change
@@ -472,21 +472,15 @@ def use_inf_as_na_cb(key):
472472
)
473473

474474

475-
# user warnings
475+
#
476+
# options from the "mode" namespace
477+
476478
chained_assignment = """
477479
: string
478480
Raise an exception, warn, or no action if trying to use chained assignment,
479481
The default is warn
480482
"""
481483

482-
with cf.config_prefix("mode"):
483-
cf.register_option(
484-
"chained_assignment",
485-
"warn",
486-
chained_assignment,
487-
validator=is_one_of_factory([None, "warn", "raise"]),
488-
)
489-
490484

491485
# Set up the io.excel specific reader configuration.
492486
reader_engine_doc = """
@@ -500,6 +494,25 @@ def use_inf_as_na_cb(key):
500494
_xlsx_options = ["xlrd", "openpyxl"]
501495
_ods_options = ["odf"]
502496
_xlsb_options = ["pyxlsb"]
497+
policy = """
498+
: string
499+
Default policy for construction of objects,
500+
The default is 'block'
501+
"""
502+
503+
with cf.config_prefix("mode"):
504+
cf.register_option(
505+
"chained_assignment",
506+
"warn",
507+
chained_assignment,
508+
validator=is_one_of_factory([None, "warn", "raise"]),
509+
)
510+
cf.register_option(
511+
"policy",
512+
"block",
513+
policy,
514+
validator=is_one_of_factory(["block", "column", "split"]),
515+
)
503516

504517

505518
with cf.config_prefix("io.excel.xls"):

pandas/core/frame.py

+22-11
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,12 @@ class DataFrame(NDFrame):
355355
Data type to force. Only a single dtype is allowed. If None, infer.
356356
copy : bool, default False
357357
Copy data from inputs. Only affects DataFrame / 2d ndarray input.
358+
policy : string, default None
359+
Provide consolidation policy
360+
- None : use default policy
361+
- block : consolidate into blocks by dtype
362+
- column : don't consolidate, but don't split blocks
363+
- split : don't consolidate, force splitting of input
358364
359365
See Also
360366
--------
@@ -363,6 +369,9 @@ class DataFrame(NDFrame):
363369
read_csv : Read a comma-separated values (csv) file into DataFrame.
364370
read_table : Read general delimited file into DataFrame.
365371
read_clipboard : Read text from clipboard into DataFrame.
372+
Data type to force. Only a single dtype is allowed. If None, infer
373+
copy : boolean, default False
374+
Copy data from inputs. Only affects DataFrame / 2d ndarray input
366375
367376
Examples
368377
--------
@@ -426,6 +435,7 @@ def __init__(
426435
columns: Optional[Axes] = None,
427436
dtype: Optional[Dtype] = None,
428437
copy: bool = False,
438+
policy=None,
429439
):
430440
if data is None:
431441
data = {}
@@ -437,10 +447,11 @@ def __init__(
437447

438448
if isinstance(data, BlockManager):
439449
mgr = self._init_mgr(
440-
data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy
450+
data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy,
451+
policy=policy,
441452
)
442453
elif isinstance(data, dict):
443-
mgr = init_dict(data, index, columns, dtype=dtype)
454+
mgr = init_dict(data, index, columns, dtype=dtype, policy=policy)
444455
elif isinstance(data, ma.MaskedArray):
445456
import numpy.ma.mrecords as mrecords
446457

@@ -457,19 +468,19 @@ def __init__(
457468
data[mask] = fill_value
458469
else:
459470
data = data.copy()
460-
mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
471+
mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy, policy=policy)
461472

462473
elif isinstance(data, (np.ndarray, Series, Index)):
463474
if data.dtype.names:
464475
data_columns = list(data.dtype.names)
465476
data = {k: data[k] for k in data_columns}
466477
if columns is None:
467478
columns = data_columns
468-
mgr = init_dict(data, index, columns, dtype=dtype)
479+
mgr = init_dict(data, index, columns, dtype=dtype, policy=policy)
469480
elif getattr(data, "name", None) is not None:
470-
mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
481+
mgr = init_dict({data.name: data}, index, columns, dtype=dtype, policy=policy)
471482
else:
472-
mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
483+
mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy, policy=policy)
473484

474485
# For data is list-like, or Iterable (will consume into list)
475486
elif isinstance(data, abc.Iterable) and not isinstance(data, (str, bytes)):
@@ -493,11 +504,11 @@ def __init__(
493504
else:
494505
index = ibase.default_index(len(data))
495506

496-
mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
507+
mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype, policy=policy)
497508
else:
498-
mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
509+
mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy, policy=policy)
499510
else:
500-
mgr = init_dict({}, index, columns, dtype=dtype)
511+
mgr = init_dict({}, index, columns, dtype=dtype, policy=policy)
501512
else:
502513
try:
503514
arr = np.array(data, dtype=dtype, copy=copy)
@@ -513,7 +524,7 @@ def __init__(
513524
(len(index), len(columns)), data, dtype=dtype
514525
)
515526
mgr = init_ndarray(
516-
values, index, columns, dtype=values.dtype, copy=False
527+
values, index, columns, dtype=values.dtype, copy=False, policy=policy,
517528
)
518529
else:
519530
raise ValueError("DataFrame constructor not properly called!")
@@ -575,7 +586,7 @@ def _is_homogeneous_type(self) -> bool:
575586
Index._is_homogeneous_type : Whether the object has a single
576587
dtype.
577588
MultiIndex._is_homogeneous_type : Whether all the levels of a
578-
MultiIndex have the same dtype.
589+
have the same dtype.
579590
580591
Examples
581592
--------

pandas/core/generic.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,19 @@ def attrs(self) -> Dict[Optional[Hashable], Any]:
249249
def attrs(self, value: Mapping[Optional[Hashable], Any]) -> None:
250250
self._attrs = dict(value)
251251

252+
@property
253+
def _policy(self):
254+
""" return my policy for internal implementation """
255+
return self._data.policy
256+
257+
@_policy.setter
258+
def _policy(self, value):
259+
"""
260+
set my policy for internal implementation
261+
should only set the property for state purposes
262+
"""
263+
self._data.policy = value
264+
252265
@classmethod
253266
def _validate_dtype(cls, dtype):
254267
""" validate the passed dtype """
@@ -1822,6 +1835,7 @@ def __getstate__(self) -> Dict[str, Any]:
18221835
_typ=self._typ,
18231836
_metadata=self._metadata,
18241837
attrs=self.attrs,
1838+
_policy=self._policy,
18251839
**meta,
18261840
)
18271841

@@ -5682,7 +5696,7 @@ def copy(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries:
56825696
dtype: object
56835697
"""
56845698
data = self._data.copy(deep=deep)
5685-
return self._constructor(data).__finalize__(self)
5699+
return self._constructor(data, policy=self._policy).__finalize__(self)
56865700

56875701
def __copy__(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries:
56885702
return self.copy(deep=deep)

pandas/core/internals/blocks.py

+4
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,10 @@ def getitem_block(self, slicer, new_mgr_locs=None):
314314

315315
return self.make_block_same_class(new_values, new_mgr_locs)
316316

317+
@property
318+
def base(self):
319+
return self.values.base
320+
317321
@property
318322
def shape(self):
319323
return self.values.shape

pandas/core/internals/construction.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
# BlockManager Interface
5454

5555

56-
def arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
56+
def arrays_to_mgr(arrays, arr_names, index, columns, dtype=None, policy=None):
5757
"""
5858
Segregate Series based on type and coerce into matrices.
5959
@@ -71,7 +71,7 @@ def arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
7171
# from BlockManager perspective
7272
axes = [ensure_index(columns), index]
7373

74-
return create_block_manager_from_arrays(arrays, arr_names, axes)
74+
return create_block_manager_from_arrays(arrays, arr_names, axes, policy=policy)
7575

7676

7777
def masked_rec_array_to_mgr(data, index, columns, dtype, copy: bool):
@@ -209,7 +209,7 @@ def init_ndarray(values, index, columns, dtype=None, copy=False):
209209
return create_block_manager_from_blocks(block_values, [columns, index])
210210

211211

212-
def init_dict(data, index, columns, dtype=None):
212+
def init_dict(data, index, columns, dtype=None, policy=None):
213213
"""
214214
Segregate Series based on type and coerce into matrices.
215215
Needs to handle a lot of exceptional cases.
@@ -250,7 +250,7 @@ def init_dict(data, index, columns, dtype=None):
250250
arrays = [
251251
arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
252252
]
253-
return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
253+
return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype, policy=policy)
254254

255255

256256
# ---------------------------------------------------------------------

0 commit comments

Comments
 (0)