Skip to content

Commit 069207f

Browse files
jrebackjorisvandenbossche
authored andcommitted
API/PERF: add policy argument to constructors, pandas-dev#10556
- closes pandas-dev#10556, add policy argument to constructors - closes pandas-dev#9216, all passing of dict with view directly to the API - closes pandas-dev#5902
1 parent 0477880 commit 069207f

File tree

7 files changed

+206
-87
lines changed

7 files changed

+206
-87
lines changed

pandas/core/config_init.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -407,16 +407,26 @@ def use_inf_as_na_cb(key):
407407
rkey='mode.use_inf_as_na')
408408

409409

410-
# user warnings
410+
#
411+
# options from the "mode" namespace
412+
411413
chained_assignment = """
412414
: string
413415
Raise an exception, warn, or no action if trying to use chained assignment,
414416
The default is warn
415417
"""
416418

419+
policy = """
420+
: string
421+
Default policy for construction of objects,
422+
The default is 'block'
423+
"""
424+
417425
with cf.config_prefix('mode'):
418426
cf.register_option('chained_assignment', 'warn', chained_assignment,
419427
validator=is_one_of_factory([None, 'warn', 'raise']))
428+
cf.register_option('policy', 'block', policy,
429+
validator=is_one_of_factory(['block', 'column', 'split']))
420430

421431
# Set up the io.excel specific configuration.
422432
writer_engine_doc = """

pandas/core/frame.py

+29-16
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,12 @@ class DataFrame(NDFrame):
266266
Data type to force. Only a single dtype is allowed. If None, infer
267267
copy : boolean, default False
268268
Copy data from inputs. Only affects DataFrame / 2d ndarray input
269+
policy : string, default None
270+
Provide consolidation policy
271+
- None : use default policy
272+
- block : consolidate into blocks by dtype
273+
- column : don't consolidate, but don't split blocks
274+
- split : don't consolidate, force splitting of input
269275
270276
Examples
271277
--------
@@ -327,7 +333,7 @@ def _constructor_expanddim(self):
327333
return Panel
328334

329335
def __init__(self, data=None, index=None, columns=None, dtype=None,
330-
copy=False):
336+
copy=False, policy=None):
331337
if data is None:
332338
data = {}
333339
if dtype is not None:
@@ -338,9 +344,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
338344

339345
if isinstance(data, BlockManager):
340346
mgr = self._init_mgr(data, axes=dict(index=index, columns=columns),
341-
dtype=dtype, copy=copy)
347+
dtype=dtype, copy=copy, policy=policy)
342348
elif isinstance(data, dict):
343-
mgr = self._init_dict(data, index, columns, dtype=dtype)
349+
mgr = self._init_dict(data, index, columns, dtype=dtype,
350+
policy=policy)
344351
elif isinstance(data, ma.MaskedArray):
345352
import numpy.ma.mrecords as mrecords
346353
# masked recarray
@@ -357,7 +364,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
357364
else:
358365
data = data.copy()
359366
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
360-
copy=copy)
367+
copy=copy, policy=policy)
361368

362369
elif isinstance(data, (np.ndarray, Series, Index)):
363370
if data.dtype.names:
@@ -368,10 +375,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
368375
mgr = self._init_dict(data, index, columns, dtype=dtype)
369376
elif getattr(data, 'name', None) is not None:
370377
mgr = self._init_dict({data.name: data}, index, columns,
371-
dtype=dtype)
378+
dtype=dtype, policy=policy)
372379
else:
373380
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
374-
copy=copy)
381+
copy=copy, policy=policy)
375382
elif isinstance(data, (list, types.GeneratorType)):
376383
if isinstance(data, types.GeneratorType):
377384
data = list(data)
@@ -392,12 +399,13 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
392399
index = _default_index(len(data))
393400

394401
mgr = _arrays_to_mgr(arrays, columns, index, columns,
395-
dtype=dtype)
402+
dtype=dtype, policy=policy)
396403
else:
397404
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
398-
copy=copy)
405+
copy=copy, policy=policy)
399406
else:
400-
mgr = self._init_dict({}, index, columns, dtype=dtype)
407+
mgr = self._init_dict({}, index, columns, dtype=dtype,
408+
policy=policy)
401409
elif isinstance(data, collections.Iterator):
402410
raise TypeError("data argument can't be an iterator")
403411
else:
@@ -412,13 +420,14 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
412420
values = cast_scalar_to_array((len(index), len(columns)),
413421
data, dtype=dtype)
414422
mgr = self._init_ndarray(values, index, columns,
415-
dtype=values.dtype, copy=False)
423+
dtype=values.dtype, copy=False,
424+
policy=policy)
416425
else:
417426
raise ValueError('DataFrame constructor not properly called!')
418427

419428
NDFrame.__init__(self, mgr, fastpath=True)
420429

421-
def _init_dict(self, data, index, columns, dtype=None):
430+
def _init_dict(self, data, index, columns, dtype=None, policy=None):
422431
"""
423432
Segregate Series based on type and coerce into matrices.
424433
Needs to handle a lot of exceptional cases.
@@ -470,9 +479,11 @@ def _init_dict(self, data, index, columns, dtype=None):
470479
columns = data_names = Index(keys)
471480
arrays = [data[k] for k in keys]
472481

473-
return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
482+
return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype,
483+
policy=policy)
474484

475-
def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
485+
def _init_ndarray(self, values, index, columns, dtype=None, copy=False,
486+
policy=None):
476487
# input must be a ndarray, list, Series, index
477488

478489
if isinstance(values, Series):
@@ -542,7 +553,8 @@ def _get_axes(N, K, index=index, columns=columns):
542553
if dtype is None and is_object_dtype(values):
543554
values = maybe_infer_to_datetimelike(values)
544555

545-
return create_block_manager_from_blocks([values], [columns, index])
556+
return create_block_manager_from_blocks([values], [columns, index],
557+
policy=policy)
546558

547559
@property
548560
def axes(self):
@@ -6018,7 +6030,7 @@ def isin(self, values):
60186030
ops.add_special_arithmetic_methods(DataFrame, **ops.frame_special_funcs)
60196031

60206032

6021-
def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
6033+
def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None, policy=None):
60226034
"""
60236035
Segregate Series based on type and coerce into matrices.
60246036
Needs to handle a lot of exceptional cases.
@@ -6035,7 +6047,8 @@ def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
60356047
# from BlockManager perspective
60366048
axes = [_ensure_index(columns), _ensure_index(index)]
60376049

6038-
return create_block_manager_from_arrays(arrays, arr_names, axes)
6050+
return create_block_manager_from_arrays(arrays, arr_names, axes,
6051+
policy=policy)
60396052

60406053

60416054
def extract_index(data):

pandas/core/generic.py

+27-6
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,19 @@ def _repr_data_resource_(self):
159159
object_pairs_hook=collections.OrderedDict)
160160
return payload
161161

162+
@property
163+
def _policy(self):
164+
""" return my policy for internal implementation """
165+
return self._data.policy
166+
167+
@_policy.setter
168+
def _policy(self, value):
169+
"""
170+
set my policy for internal implementation
171+
should only set the property for state purposes
172+
"""
173+
self._data.policy = value
174+
162175
def _validate_dtype(self, dtype):
163176
""" validate the passed dtype """
164177

@@ -173,7 +186,7 @@ def _validate_dtype(self, dtype):
173186

174187
return dtype
175188

176-
def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):
189+
def _init_mgr(self, mgr, axes=None, dtype=None, copy=False, policy=None):
177190
""" passed a manager and a axes dict """
178191
for a, axe in axes.items():
179192
if axe is not None:
@@ -1515,7 +1528,7 @@ def to_dense(self):
15151528
def __getstate__(self):
15161529
meta = {k: getattr(self, k, None) for k in self._metadata}
15171530
return dict(_data=self._data, _typ=self._typ, _metadata=self._metadata,
1518-
**meta)
1531+
_policy=self._policy, **meta)
15191532

15201533
def __setstate__(self, state):
15211534

@@ -2310,9 +2323,17 @@ def _check_is_chained_assignment_possible(self):
23102323
"""
23112324
if self._is_view and self._is_cached:
23122325
ref = self._get_cacher()
2313-
if ref is not None and ref._is_mixed_type:
2314-
self._check_setitem_copy(stacklevel=4, t='referant',
2315-
force=True)
2326+
if ref is not None:
2327+
2328+
# TODO: fix me!
2329+
# if we are a single block, then we don't need to check
2330+
# anything here if we are column and are actually a block,
2331+
# maybe be a bit tricky
2332+
if ref._policy in ['column', 'split']:
2333+
return True
2334+
if ref._is_mixed_type:
2335+
self._check_setitem_copy(stacklevel=4, t='referant',
2336+
force=True)
23162337
return True
23172338
elif self._is_copy:
23182339
self._check_setitem_copy(stacklevel=4, t='referant')
@@ -4436,7 +4457,7 @@ def copy(self, deep=True):
44364457
copy : type of caller
44374458
"""
44384459
data = self._data.copy(deep=deep)
4439-
return self._constructor(data).__finalize__(self)
4460+
return self._constructor(data, policy=self._policy).__finalize__(self)
44404461

44414462
def __copy__(self, deep=True):
44424463
return self.copy(deep=deep)

0 commit comments

Comments
 (0)