Skip to content

Commit d82196c

Browse files
committed
API/PERF: add policy argument to constructors, pandas-dev#10556
- closes pandas-dev#10556, add policy argument to constructors - closes pandas-dev#9216, all passing of dict with view directly to the API - closes pandas-dev#5902 - closes pandas-dev#8571 by defining __copy__/__deepcopy__
1 parent 99bf170 commit d82196c

12 files changed

+601
-244
lines changed

pandas/core/config_init.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -333,16 +333,26 @@ def use_inf_as_null_cb(key):
333333
cb=use_inf_as_null_cb)
334334

335335

336-
# user warnings
336+
#
337+
# options from the "mode" namespace
338+
337339
chained_assignment = """
338340
: string
339341
Raise an exception, warn, or no action if trying to use chained assignment,
340342
The default is warn
341343
"""
342344

345+
policy = """
346+
: string
347+
Default policy for construction of objects,
348+
The default is 'block'
349+
"""
350+
343351
with cf.config_prefix('mode'):
344352
cf.register_option('chained_assignment', 'warn', chained_assignment,
345353
validator=is_one_of_factory([None, 'warn', 'raise']))
354+
cf.register_option('policy', 'block', policy,
355+
validator=is_one_of_factory(['block', 'column', 'split']))
346356

347357

348358
# Set up the io.excel specific configuration.

pandas/core/frame.py

+22-16
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,12 @@ class DataFrame(NDFrame):
182182
Data type to force, otherwise infer
183183
copy : boolean, default False
184184
Copy data from inputs. Only affects DataFrame / 2d ndarray input
185+
policy : string, default None
186+
Provide consolidation policy
187+
- None : use default policy
188+
- block : consolidate into blocks by dtype
189+
- column : don't consolidate, but don't split blocks
190+
- split : don't consolidate, force splitting of input
185191
186192
Examples
187193
--------
@@ -211,7 +217,7 @@ def _constructor_expanddim(self):
211217
return Panel
212218

213219
def __init__(self, data=None, index=None, columns=None, dtype=None,
214-
copy=False):
220+
copy=False, policy=None):
215221
if data is None:
216222
data = {}
217223
if dtype is not None:
@@ -222,9 +228,9 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
222228

223229
if isinstance(data, BlockManager):
224230
mgr = self._init_mgr(data, axes=dict(index=index, columns=columns),
225-
dtype=dtype, copy=copy)
231+
dtype=dtype, copy=copy, policy=policy)
226232
elif isinstance(data, dict):
227-
mgr = self._init_dict(data, index, columns, dtype=dtype)
233+
mgr = self._init_dict(data, index, columns, dtype=dtype, policy=policy)
228234
elif isinstance(data, ma.MaskedArray):
229235
import numpy.ma.mrecords as mrecords
230236
# masked recarray
@@ -241,7 +247,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
241247
else:
242248
data = data.copy()
243249
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
244-
copy=copy)
250+
copy=copy, policy=policy)
245251

246252
elif isinstance(data, (np.ndarray, Series, Index)):
247253
if data.dtype.names:
@@ -252,10 +258,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
252258
mgr = self._init_dict(data, index, columns, dtype=dtype)
253259
elif getattr(data, 'name', None):
254260
mgr = self._init_dict({data.name: data}, index, columns,
255-
dtype=dtype)
261+
dtype=dtype, policy=policy)
256262
else:
257263
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
258-
copy=copy)
264+
copy=copy, policy=policy)
259265
elif isinstance(data, (list, types.GeneratorType)):
260266
if isinstance(data, types.GeneratorType):
261267
data = list(data)
@@ -274,12 +280,12 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
274280
index = _default_index(len(data))
275281

276282
mgr = _arrays_to_mgr(arrays, columns, index, columns,
277-
dtype=dtype)
283+
dtype=dtype, policy=policy)
278284
else:
279285
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
280-
copy=copy)
286+
copy=copy, policy=policy)
281287
else:
282-
mgr = self._init_dict({}, index, columns, dtype=dtype)
288+
mgr = self._init_dict({}, index, columns, dtype=dtype, policy=policy)
283289
elif isinstance(data, collections.Iterator):
284290
raise TypeError("data argument can't be an iterator")
285291
else:
@@ -299,13 +305,13 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
299305
values = np.empty((len(index), len(columns)), dtype=dtype)
300306
values.fill(data)
301307
mgr = self._init_ndarray(values, index, columns, dtype=dtype,
302-
copy=False)
308+
copy=False, policy=policy)
303309
else:
304310
raise PandasError('DataFrame constructor not properly called!')
305311

306312
NDFrame.__init__(self, mgr, fastpath=True)
307313

308-
def _init_dict(self, data, index, columns, dtype=None):
314+
def _init_dict(self, data, index, columns, dtype=None, policy=None):
309315
"""
310316
Segregate Series based on type and coerce into matrices.
311317
Needs to handle a lot of exceptional cases.
@@ -359,10 +365,10 @@ def _init_dict(self, data, index, columns, dtype=None):
359365
arrays = [data[k] for k in keys]
360366

361367
return _arrays_to_mgr(arrays, data_names, index, columns,
362-
dtype=dtype)
368+
dtype=dtype, policy=policy)
363369

364370
def _init_ndarray(self, values, index, columns, dtype=None,
365-
copy=False):
371+
copy=False, policy=None):
366372
# input must be a ndarray, list, Series, index
367373

368374
if isinstance(values, Series):
@@ -433,7 +439,7 @@ def _get_axes(N, K, index=index, columns=columns):
433439
if dtype is None and is_object_dtype(values):
434440
values = _possibly_infer_to_datetimelike(values)
435441

436-
return create_block_manager_from_blocks([values], [columns, index])
442+
return create_block_manager_from_blocks([values], [columns, index], policy=policy)
437443

438444
@property
439445
def axes(self):
@@ -5082,7 +5088,7 @@ def combineMult(self, other):
50825088

50835089
_EMPTY_SERIES = Series([])
50845090

5085-
def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
5091+
def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None, policy=None):
50865092
"""
50875093
Segregate Series based on type and coerce into matrices.
50885094
Needs to handle a lot of exceptional cases.
@@ -5099,7 +5105,7 @@ def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
50995105
# from BlockManager perspective
51005106
axes = [_ensure_index(columns), _ensure_index(index)]
51015107

5102-
return create_block_manager_from_arrays(arrays, arr_names, axes)
5108+
return create_block_manager_from_arrays(arrays, arr_names, axes, policy=policy)
51035109

51045110

51055111
def extract_index(data):

pandas/core/generic.py

+32-5
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,19 @@ def __init__(self, data, axes=None, copy=False, dtype=None,
106106
object.__setattr__(self, '_data', data)
107107
object.__setattr__(self, '_item_cache', {})
108108

109+
@property
110+
def _policy(self):
111+
""" return my policy for internal implementation """
112+
return self._data.policy
113+
114+
@_policy.setter
115+
def _policy(self, value):
116+
"""
117+
set my policy for internal implementation
118+
should only set the property for state purposes
119+
"""
120+
self._data.policy = value
121+
109122
def _validate_dtype(self, dtype):
110123
""" validate the passed dtype """
111124

@@ -119,7 +132,7 @@ def _validate_dtype(self, dtype):
119132
.format(self.__class__.__name__))
120133
return dtype
121134

122-
def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):
135+
def _init_mgr(self, mgr, axes=None, dtype=None, copy=False, policy=None):
123136
""" passed a manager and a axes dict """
124137
for a, axe in axes.items():
125138
if axe is not None:
@@ -778,7 +791,8 @@ def to_dense(self):
778791
def __getstate__(self):
779792
meta = dict((k, getattr(self, k, None)) for k in self._metadata)
780793
return dict(_data=self._data, _typ=self._typ,
781-
_metadata=self._metadata, **meta)
794+
_metadata=self._metadata,
795+
_policy=self._policy, **meta)
782796

783797
def __setstate__(self, state):
784798

@@ -1228,8 +1242,15 @@ def _check_is_chained_assignment_possible(self):
12281242
"""
12291243
if self._is_view and self._is_cached:
12301244
ref = self._get_cacher()
1231-
if ref is not None and ref._is_mixed_type:
1232-
self._check_setitem_copy(stacklevel=4, t='referant', force=True)
1245+
if ref is not None:
1246+
1247+
# TODO: fix me!
1248+
# if we are a single block, then we don't need to check anything here
1249+
# if we are column and are actually a block, maybe be a bit tricky
1250+
if ref._policy in ['column','split']:
1251+
return True
1252+
if ref._is_mixed_type:
1253+
self._check_setitem_copy(stacklevel=4, t='referant', force=True)
12331254
return True
12341255
elif self.is_copy:
12351256
self._check_setitem_copy(stacklevel=4, t='referant')
@@ -2518,6 +2539,12 @@ def astype(self, dtype, copy=True, raise_on_error=True, **kwargs):
25182539
dtype=dtype, copy=copy, raise_on_error=raise_on_error, **kwargs)
25192540
return self._constructor(mgr).__finalize__(self)
25202541

2542+
def __deepcopy__(self, memo={}):
2543+
return self.copy(deep=True)
2544+
2545+
def __copy__(self):
2546+
return self.copy()
2547+
25212548
def copy(self, deep=True):
25222549
"""
25232550
Make a copy of this object
@@ -2532,7 +2559,7 @@ def copy(self, deep=True):
25322559
copy : type of caller
25332560
"""
25342561
data = self._data.copy(deep=deep)
2535-
return self._constructor(data).__finalize__(self)
2562+
return self._constructor(data, policy=self._policy).__finalize__(self)
25362563

25372564
def _convert(self, datetime=False, numeric=False, timedelta=False,
25382565
coerce=False, copy=True):

0 commit comments

Comments
 (0)