59
59
60
60
from pandas .core .index import Index , MultiIndex , _ensure_index
61
61
from pandas .core .indexing import maybe_convert_indices , length_of_indexer
62
- from pandas .core .arrays . categorical import Categorical , _maybe_to_categorical
62
+ from pandas .core .arrays import Categorical , ExtensionArray
63
63
from pandas .core .indexes .datetimes import DatetimeIndex
64
64
from pandas .io .formats .printing import pprint_thing
65
65
@@ -95,6 +95,7 @@ class Block(PandasObject):
95
95
is_object = False
96
96
is_categorical = False
97
97
is_sparse = False
98
+ is_extension = False
98
99
_box_to_block_values = True
99
100
_can_hold_na = False
100
101
_downcast_dtype = None
@@ -108,14 +109,14 @@ class Block(PandasObject):
108
109
def __init__ (self , values , placement , ndim = None , fastpath = False ):
109
110
if ndim is None :
110
111
ndim = values .ndim
111
- elif values .ndim != ndim :
112
+ elif self . _validate_ndim and values .ndim != ndim :
112
113
raise ValueError ('Wrong number of dimensions' )
113
114
self .ndim = ndim
114
115
115
116
self .mgr_locs = placement
116
117
self .values = values
117
118
118
- if ndim and len (self .mgr_locs ) != len (self .values ):
119
+ if self . _validate_ndim and ndim and len (self .mgr_locs ) != len (self .values ):
119
120
raise ValueError (
120
121
'Wrong number of items passed {val}, placement implies '
121
122
'{mgr}' .format (val = len (self .values ), mgr = len (self .mgr_locs )))
@@ -274,7 +275,6 @@ def reshape_nd(self, labels, shape, ref_items, mgr=None):
274
275
275
276
return a new block that is transformed to a nd block
276
277
"""
277
-
278
278
return _block2d_to_blocknd (values = self .get_values ().T ,
279
279
placement = self .mgr_locs , shape = shape ,
280
280
labels = labels , ref_items = ref_items )
@@ -1697,24 +1697,19 @@ class NonConsolidatableMixIn(object):
1697
1697
_holder = None
1698
1698
1699
1699
def __init__ (self , values , placement , ndim = None , fastpath = False , ** kwargs ):
1700
+ # Placement must be converted to BlockPlacement so that we can check
1701
+ # its length
1702
+ if not isinstance (placement , BlockPlacement ):
1703
+ placement = BlockPlacement (placement )
1700
1704
1701
- # Placement must be converted to BlockPlacement via property setter
1702
- # before ndim logic, because placement may be a slice which doesn't
1703
- # have a length.
1704
- self .mgr_locs = placement
1705
-
1706
- # kludgetastic
1705
+ # Maybe infer ndim from placement
1707
1706
if ndim is None :
1708
- if len (self . mgr_locs ) != 1 :
1707
+ if len (placement ) != 1 :
1709
1708
ndim = 1
1710
1709
else :
1711
1710
ndim = 2
1712
- self .ndim = ndim
1713
-
1714
- if not isinstance (values , self ._holder ):
1715
- raise TypeError ("values must be {0}" .format (self ._holder .__name__ ))
1716
-
1717
- self .values = values
1711
+ super (NonConsolidatableMixIn , self ).__init__ (values , placement , ndim = ndim ,
1712
+ fastpath = fastpath )
1718
1713
1719
1714
@property
1720
1715
def shape (self ):
@@ -1765,7 +1760,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
1765
1760
1766
1761
Returns
1767
1762
-------
1768
- a new block(s) , the result of the putmask
1763
+ a new block, the result of the putmask
1769
1764
"""
1770
1765
inplace = validate_bool_kwarg (inplace , 'inplace' )
1771
1766
@@ -1823,6 +1818,91 @@ def _unstack(self, unstacker_func, new_columns):
1823
1818
return blocks , mask
1824
1819
1825
1820
1821
+ class ExtensionBlock (NonConsolidatableMixIn , Block ):
1822
+ """Block for holding extension types.
1823
+
1824
+ Notes
1825
+ -----
1826
+ This is the holds all 3rd-party extension types. It's also the immediate
1827
+ parent class for our internal extension types' blocks, CategoricalBlock.
1828
+
1829
+ All extension arrays *must* be 1-D, which simplifies things a bit.
1830
+ """
1831
+ # Some questions / notes as comments, will be removed.
1832
+ #
1833
+ # Currently inherited from NCB. We'll keep it around until SparseBlock
1834
+ # and DatetimeTZBlock are refactored.
1835
+ # - set
1836
+ # - iget
1837
+ # - should_store
1838
+ # - putmask
1839
+ # - _slice
1840
+ # - _try_cast_result
1841
+ # - unstack
1842
+
1843
+ # Think about overriding these methods from Block
1844
+ # - _maybe_downcast: (never downcast)
1845
+
1846
+ # Methods we can (probably) ignore and just use Block's:
1847
+
1848
+ # * replace / replace_single
1849
+ # Categorical got Object, but was hopefully unnescessary.
1850
+ # DatetimeTZ, Sparse got Block
1851
+ # * is_view
1852
+ # Categorical overrides to say that it is not.
1853
+ # DatetimeTZ, Sparse inherits Base anyway
1854
+
1855
+ is_extension = True
1856
+
1857
+ # XXX
1858
+ # is_bool is is a change for CategoricalBlock. Used to inherit
1859
+ # from Object to infer from values. If this matters, we should
1860
+ # override it directly in CategoricalBlock so that we infer from
1861
+ # the categories, not the codes.
1862
+ is_bool = False
1863
+
1864
+ def __init__ (self , values , placement , ndim = None , fastpath = False ):
1865
+ self ._holder = type (values )
1866
+ super (ExtensionBlock , self ).__init__ (values , placement , ndim = ndim ,
1867
+ fastpath = fastpath )
1868
+
1869
+ def get_values (self , dtype = None ):
1870
+ # ExtensionArrays must be iterable, so this works.
1871
+ values = np .asarray (self .values )
1872
+ if values .ndim == self .ndim - 1 :
1873
+ values = values .reshape ((1 ,) + values .shape )
1874
+ return values
1875
+
1876
+ def _can_hold_element (self , element ):
1877
+ # XXX:
1878
+ # Not defined on NCM.
1879
+ # Categorical got True from ObjectBlock
1880
+ # DatetimeTZ gets DatetimeBlock
1881
+ # Sparse gets Block
1882
+ # Let's just assume yes for now, but we can maybe push
1883
+ # this onto the array.
1884
+ return True
1885
+
1886
+ def convert (self , copy = True , ** kwargs ):
1887
+ # We're dedicated to a type, we don't convert.
1888
+ # Taken from CategoricalBlock / Block.
1889
+ return self .copy () if copy else self
1890
+
1891
+ def _slice (self , slicer ):
1892
+ """ return a slice of my values """
1893
+
1894
+ # slice the category
1895
+ # return same dims as we currently have
1896
+
1897
+ if isinstance (slicer , tuple ) and len (slicer ) == 2 :
1898
+ if not is_null_slice (slicer [0 ]):
1899
+ raise AssertionError ("invalid slicing for a 1-ndim "
1900
+ "categorical" )
1901
+ slicer = slicer [1 ]
1902
+
1903
+ return self .values ._slice (slicer )
1904
+
1905
+
1826
1906
class NumericBlock (Block ):
1827
1907
__slots__ = ()
1828
1908
is_numeric = True
@@ -2337,7 +2417,7 @@ def re_replacer(s):
2337
2417
return block
2338
2418
2339
2419
2340
- class CategoricalBlock (NonConsolidatableMixIn , ObjectBlock ):
2420
+ class CategoricalBlock (ExtensionBlock ):
2341
2421
__slots__ = ()
2342
2422
is_categorical = True
2343
2423
_verify_integrity = True
@@ -2346,6 +2426,7 @@ class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock):
2346
2426
_concatenator = staticmethod (_concat ._concat_categorical )
2347
2427
2348
2428
def __init__ (self , values , placement , fastpath = False , ** kwargs ):
2429
+ from pandas .core .arrays .categorical import _maybe_to_categorical
2349
2430
2350
2431
# coerce to categorical if we can
2351
2432
super (CategoricalBlock , self ).__init__ (_maybe_to_categorical (values ),
@@ -2360,23 +2441,13 @@ def is_view(self):
2360
2441
def to_dense (self ):
2361
2442
return self .values .to_dense ().view ()
2362
2443
2363
- def convert (self , copy = True , ** kwargs ):
2364
- return self .copy () if copy else self
2365
-
2366
2444
@property
2367
2445
def array_dtype (self ):
2368
2446
""" the dtype to return if I want to construct this block as an
2369
2447
array
2370
2448
"""
2371
2449
return np .object_
2372
2450
2373
- def _slice (self , slicer ):
2374
- """ return a slice of my values """
2375
-
2376
- # slice the category
2377
- # return same dims as we currently have
2378
- return self .values ._slice (slicer )
2379
-
2380
2451
def _try_coerce_result (self , result ):
2381
2452
""" reverse of try_coerce_args """
2382
2453
@@ -2468,7 +2539,8 @@ class DatetimeBlock(DatetimeLikeBlockMixin, Block):
2468
2539
_can_hold_na = True
2469
2540
2470
2541
def __init__ (self , values , placement , fastpath = False , ** kwargs ):
2471
- if values .dtype != _NS_DTYPE :
2542
+ if values .dtype != _NS_DTYPE and values .dtype .base != _NS_DTYPE :
2543
+ # not datetime64 or datetime64tz
2472
2544
values = conversion .ensure_datetime64ns (values )
2473
2545
2474
2546
super (DatetimeBlock , self ).__init__ (values , fastpath = True ,
0 commit comments