Skip to content

Commit 72e171b

Browse files
jrebackjorisvandenbossche
authored andcommitted
API/PERF: add policy argument to constructors, pandas-dev#10556
- closes pandas-dev#10556, add policy argument to constructors - closes pandas-dev#9216, all passing of dict with view directly to the API - closes pandas-dev#5902 - closes pandas-dev#8571 by defining __copy__/__deepcopy__
1 parent 0477880 commit 72e171b

File tree

7 files changed

+201
-87
lines changed

7 files changed

+201
-87
lines changed

pandas/core/config_init.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -407,16 +407,26 @@ def use_inf_as_na_cb(key):
407407
rkey='mode.use_inf_as_na')
408408

409409

410-
# user warnings
410+
#
411+
# options from the "mode" namespace
412+
411413
chained_assignment = """
412414
: string
413415
Raise an exception, warn, or no action if trying to use chained assignment,
414416
The default is warn
415417
"""
416418

419+
policy = """
420+
: string
421+
Default policy for construction of objects,
422+
The default is 'block'
423+
"""
424+
417425
with cf.config_prefix('mode'):
418426
cf.register_option('chained_assignment', 'warn', chained_assignment,
419427
validator=is_one_of_factory([None, 'warn', 'raise']))
428+
cf.register_option('policy', 'block', policy,
429+
validator=is_one_of_factory(['block', 'column', 'split']))
420430

421431
# Set up the io.excel specific configuration.
422432
writer_engine_doc = """

pandas/core/frame.py

+25-16
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,12 @@ class DataFrame(NDFrame):
266266
Data type to force. Only a single dtype is allowed. If None, infer
267267
copy : boolean, default False
268268
Copy data from inputs. Only affects DataFrame / 2d ndarray input
269+
policy : string, default None
270+
Provide consolidation policy
271+
- None : use default policy
272+
- block : consolidate into blocks by dtype
273+
- column : don't consolidate, but don't split blocks
274+
- split : don't consolidate, force splitting of input
269275
270276
Examples
271277
--------
@@ -327,7 +333,7 @@ def _constructor_expanddim(self):
327333
return Panel
328334

329335
def __init__(self, data=None, index=None, columns=None, dtype=None,
330-
copy=False):
336+
copy=False, policy=None):
331337
if data is None:
332338
data = {}
333339
if dtype is not None:
@@ -338,9 +344,9 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
338344

339345
if isinstance(data, BlockManager):
340346
mgr = self._init_mgr(data, axes=dict(index=index, columns=columns),
341-
dtype=dtype, copy=copy)
347+
dtype=dtype, copy=copy, policy=policy)
342348
elif isinstance(data, dict):
343-
mgr = self._init_dict(data, index, columns, dtype=dtype)
349+
mgr = self._init_dict(data, index, columns, dtype=dtype, policy=policy)
344350
elif isinstance(data, ma.MaskedArray):
345351
import numpy.ma.mrecords as mrecords
346352
# masked recarray
@@ -357,7 +363,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
357363
else:
358364
data = data.copy()
359365
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
360-
copy=copy)
366+
copy=copy, policy=policy)
361367

362368
elif isinstance(data, (np.ndarray, Series, Index)):
363369
if data.dtype.names:
@@ -368,10 +374,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
368374
mgr = self._init_dict(data, index, columns, dtype=dtype)
369375
elif getattr(data, 'name', None) is not None:
370376
mgr = self._init_dict({data.name: data}, index, columns,
371-
dtype=dtype)
377+
dtype=dtype, policy=policy)
372378
else:
373379
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
374-
copy=copy)
380+
copy=copy, policy=policy)
375381
elif isinstance(data, (list, types.GeneratorType)):
376382
if isinstance(data, types.GeneratorType):
377383
data = list(data)
@@ -392,12 +398,12 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
392398
index = _default_index(len(data))
393399

394400
mgr = _arrays_to_mgr(arrays, columns, index, columns,
395-
dtype=dtype)
401+
dtype=dtype, policy=policy)
396402
else:
397403
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
398-
copy=copy)
404+
copy=copy, policy=policy)
399405
else:
400-
mgr = self._init_dict({}, index, columns, dtype=dtype)
406+
mgr = self._init_dict({}, index, columns, dtype=dtype, policy=policy)
401407
elif isinstance(data, collections.Iterator):
402408
raise TypeError("data argument can't be an iterator")
403409
else:
@@ -412,13 +418,14 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
412418
values = cast_scalar_to_array((len(index), len(columns)),
413419
data, dtype=dtype)
414420
mgr = self._init_ndarray(values, index, columns,
415-
dtype=values.dtype, copy=False)
421+
dtype=values.dtype, copy=False,
422+
policy=policy)
416423
else:
417424
raise ValueError('DataFrame constructor not properly called!')
418425

419426
NDFrame.__init__(self, mgr, fastpath=True)
420427

421-
def _init_dict(self, data, index, columns, dtype=None):
428+
def _init_dict(self, data, index, columns, dtype=None, policy=None):
422429
"""
423430
Segregate Series based on type and coerce into matrices.
424431
Needs to handle a lot of exceptional cases.
@@ -470,9 +477,11 @@ def _init_dict(self, data, index, columns, dtype=None):
470477
columns = data_names = Index(keys)
471478
arrays = [data[k] for k in keys]
472479

473-
return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
480+
return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype,
481+
policy=policy)
474482

475-
def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
483+
def _init_ndarray(self, values, index, columns, dtype=None, copy=False,
484+
policy=None):
476485
# input must be a ndarray, list, Series, index
477486

478487
if isinstance(values, Series):
@@ -542,7 +551,7 @@ def _get_axes(N, K, index=index, columns=columns):
542551
if dtype is None and is_object_dtype(values):
543552
values = maybe_infer_to_datetimelike(values)
544553

545-
return create_block_manager_from_blocks([values], [columns, index])
554+
return create_block_manager_from_blocks([values], [columns, index], policy=policy)
546555

547556
@property
548557
def axes(self):
@@ -6018,7 +6027,7 @@ def isin(self, values):
60186027
ops.add_special_arithmetic_methods(DataFrame, **ops.frame_special_funcs)
60196028

60206029

6021-
def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
6030+
def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None, policy=None):
60226031
"""
60236032
Segregate Series based on type and coerce into matrices.
60246033
Needs to handle a lot of exceptional cases.
@@ -6035,7 +6044,7 @@ def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
60356044
# from BlockManager perspective
60366045
axes = [_ensure_index(columns), _ensure_index(index)]
60376046

6038-
return create_block_manager_from_arrays(arrays, arr_names, axes)
6047+
return create_block_manager_from_arrays(arrays, arr_names, axes, policy=policy)
60396048

60406049

60416050
def extract_index(data):

pandas/core/generic.py

+31-6
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,18 @@ def _repr_data_resource_(self):
159159
object_pairs_hook=collections.OrderedDict)
160160
return payload
161161

162+
def _policy(self):
163+
""" return my policy for internal implementation """
164+
return self._data.policy
165+
166+
@_policy.setter
167+
def _policy(self, value):
168+
"""
169+
set my policy for internal implementation
170+
should only set the property for state purposes
171+
"""
172+
self._data.policy = value
173+
162174
def _validate_dtype(self, dtype):
163175
""" validate the passed dtype """
164176

@@ -173,7 +185,7 @@ def _validate_dtype(self, dtype):
173185

174186
return dtype
175187

176-
def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):
188+
def _init_mgr(self, mgr, axes=None, dtype=None, copy=False, policy=None):
177189
""" passed a manager and a axes dict """
178190
for a, axe in axes.items():
179191
if axe is not None:
@@ -1515,7 +1527,7 @@ def to_dense(self):
15151527
def __getstate__(self):
15161528
meta = {k: getattr(self, k, None) for k in self._metadata}
15171529
return dict(_data=self._data, _typ=self._typ, _metadata=self._metadata,
1518-
**meta)
1530+
_policy=self._policy, **meta)
15191531

15201532
def __setstate__(self, state):
15211533

@@ -2310,9 +2322,16 @@ def _check_is_chained_assignment_possible(self):
23102322
"""
23112323
if self._is_view and self._is_cached:
23122324
ref = self._get_cacher()
2313-
if ref is not None and ref._is_mixed_type:
2314-
self._check_setitem_copy(stacklevel=4, t='referant',
2315-
force=True)
2325+
if ref is not None:
2326+
2327+
# TODO: fix me!
2328+
# if we are a single block, then we don't need to check anything here
2329+
# if we are column and are actually a block, maybe be a bit tricky
2330+
if ref._policy in ['column','split']:
2331+
return True
2332+
if ref._is_mixed_type:
2333+
self._check_setitem_copy(stacklevel=4, t='referant',
2334+
force=True)
23162335
return True
23172336
elif self._is_copy:
23182337
self._check_setitem_copy(stacklevel=4, t='referant')
@@ -4416,6 +4435,12 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs):
44164435
**kwargs)
44174436
return self._constructor(new_data).__finalize__(self)
44184437

4438+
def __deepcopy__(self, memo={}):
4439+
return self.copy(deep=True)
4440+
4441+
def __copy__(self):
4442+
return self.copy()
4443+
44194444
def copy(self, deep=True):
44204445
"""
44214446
Make a copy of this objects data.
@@ -4436,7 +4461,7 @@ def copy(self, deep=True):
44364461
copy : type of caller
44374462
"""
44384463
data = self._data.copy(deep=deep)
4439-
return self._constructor(data).__finalize__(self)
4464+
return self._constructor(data, policy=self._policy).__finalize__(self)
44404465

44414466
def __copy__(self, deep=True):
44424467
return self.copy(deep=deep)

0 commit comments

Comments
 (0)