@@ -1782,13 +1782,13 @@ def set_atom(self, block, block_items, existing_col, min_itemsize,
1782
1782
return self .set_atom_timedelta64 (block )
1783
1783
1784
1784
dtype = block .dtype .name
1785
- rvalues = block .values .ravel ()
1786
- inferred_type = lib .infer_dtype (rvalues )
1785
+ inferred_type = lib .infer_dtype (block .values )
1787
1786
1788
1787
if inferred_type == 'date' :
1789
1788
raise TypeError (
1790
1789
"[date] is not implemented as a table column" )
1791
1790
elif inferred_type == 'datetime' :
1791
+ rvalues = block .values .ravel ()
1792
1792
if getattr (rvalues [0 ], 'tzinfo' , None ) is not None :
1793
1793
1794
1794
# if this block has more than one timezone, raise
@@ -1917,7 +1917,7 @@ def get_atom_data(self, block, kind=None):
1917
1917
def set_atom_data (self , block ):
1918
1918
self .kind = block .dtype .name
1919
1919
self .typ = self .get_atom_data (block )
1920
- self .set_data (block .values .astype (self .typ .type ))
1920
+ self .set_data (block .values .astype (self .typ .type , copy = False ))
1921
1921
1922
1922
def set_atom_categorical (self , block , items , info = None , values = None ):
1923
1923
# currently only supports a 1-D categorical
@@ -2016,7 +2016,7 @@ def convert(self, values, nan_rep, encoding):
2016
2016
2017
2017
index = DatetimeIndex (
2018
2018
self .data .ravel (), tz = 'UTC' ).tz_convert (self .tz )
2019
- self .data = np .array (
2019
+ self .data = np .asarray (
2020
2020
index .tolist (), dtype = object ).reshape (self .data .shape )
2021
2021
2022
2022
else :
@@ -2026,14 +2026,14 @@ def convert(self, values, nan_rep, encoding):
2026
2026
self .data = np .asarray (self .data , dtype = 'm8[ns]' )
2027
2027
elif dtype == u ('date' ):
2028
2028
try :
2029
- self .data = np .array (
2029
+ self .data = np .asarray (
2030
2030
[date .fromordinal (v ) for v in self .data ], dtype = object )
2031
2031
except ValueError :
2032
- self .data = np .array (
2032
+ self .data = np .asarray (
2033
2033
[date .fromtimestamp (v ) for v in self .data ],
2034
2034
dtype = object )
2035
2035
elif dtype == u ('datetime' ):
2036
- self .data = np .array (
2036
+ self .data = np .asarray (
2037
2037
[datetime .fromtimestamp (v ) for v in self .data ],
2038
2038
dtype = object )
2039
2039
@@ -2048,9 +2048,9 @@ def convert(self, values, nan_rep, encoding):
2048
2048
else :
2049
2049
2050
2050
try :
2051
- self .data = self .data .astype (dtype )
2051
+ self .data = self .data .astype (dtype , copy = False )
2052
2052
except :
2053
- self .data = self .data .astype ('O' )
2053
+ self .data = self .data .astype ('O' , copy = False )
2054
2054
2055
2055
# convert nans / decode
2056
2056
if _ensure_decoded (self .kind ) == u ('string' ):
@@ -2337,9 +2337,9 @@ def read_array(self, key):
2337
2337
ret = data
2338
2338
2339
2339
if dtype == u ('datetime64' ):
2340
- ret = np .array (ret , dtype = 'M8[ns]' )
2340
+ ret = np .asarray (ret , dtype = 'M8[ns]' )
2341
2341
elif dtype == u ('timedelta64' ):
2342
- ret = np .array (ret , dtype = 'm8[ns]' )
2342
+ ret = np .asarray (ret , dtype = 'm8[ns]' )
2343
2343
2344
2344
if transposed :
2345
2345
return ret .T
@@ -3793,7 +3793,7 @@ def write_data(self, chunksize, dropna=True):
3793
3793
# figure the mask: only do if we can successfully process this
3794
3794
# column, otherwise ignore the mask
3795
3795
mask = com .isnull (a .data ).all (axis = 0 )
3796
- masks .append (mask .astype ('u1' ))
3796
+ masks .append (mask .astype ('u1' , copy = False ))
3797
3797
3798
3798
# consolidate masks
3799
3799
mask = masks [0 ]
@@ -3803,8 +3803,7 @@ def write_data(self, chunksize, dropna=True):
3803
3803
3804
3804
else :
3805
3805
3806
- mask = np .empty (nrows , dtype = 'u1' )
3807
- mask .fill (False )
3806
+ mask = None
3808
3807
3809
3808
# broadcast the indexes if needed
3810
3809
indexes = [a .cvalues for a in self .index_axes ]
@@ -3833,12 +3832,13 @@ def write_data(self, chunksize, dropna=True):
3833
3832
bvalues = []
3834
3833
for i , v in enumerate (values ):
3835
3834
new_shape = (nrows ,) + self .dtype [names [nindexes + i ]].shape
3836
- bvalues .append (values [i ].ravel (). reshape (new_shape ))
3835
+ bvalues .append (values [i ].reshape (new_shape ))
3837
3836
3838
3837
# write the chunks
3839
3838
if chunksize is None :
3840
3839
chunksize = 100000
3841
3840
3841
+ rows = np .empty (min (chunksize ,nrows ), dtype = self .dtype )
3842
3842
chunks = int (nrows / chunksize ) + 1
3843
3843
for i in range (chunks ):
3844
3844
start_i = i * chunksize
@@ -3847,11 +3847,20 @@ def write_data(self, chunksize, dropna=True):
3847
3847
break
3848
3848
3849
3849
self .write_data_chunk (
3850
+ rows ,
3850
3851
indexes = [a [start_i :end_i ] for a in bindexes ],
3851
- mask = mask [start_i :end_i ],
3852
+ mask = mask [start_i :end_i ] if mask is not None else None ,
3852
3853
values = [v [start_i :end_i ] for v in bvalues ])
3853
3854
3854
- def write_data_chunk (self , indexes , mask , values ):
3855
+ def write_data_chunk (self , rows , indexes , mask , values ):
3856
+ """
3857
+ Parameters
3858
+ ----------
3859
+ rows : an empty memory space where we are putting the chunk
3860
+ indexes : an array of the indexes
3861
+ mask : an array of the masks
3862
+ values : an array of the values
3863
+ """
3855
3864
3856
3865
# 0 len
3857
3866
for v in values :
@@ -3860,7 +3869,8 @@ def write_data_chunk(self, indexes, mask, values):
3860
3869
3861
3870
try :
3862
3871
nrows = indexes [0 ].shape [0 ]
3863
- rows = np .empty (nrows , dtype = self .dtype )
3872
+ if nrows != len (rows ):
3873
+ rows = np .empty (nrows , dtype = self .dtype )
3864
3874
names = self .dtype .names
3865
3875
nindexes = len (indexes )
3866
3876
@@ -3873,7 +3883,10 @@ def write_data_chunk(self, indexes, mask, values):
3873
3883
rows [names [i + nindexes ]] = v
3874
3884
3875
3885
# mask
3876
- rows = rows [~ mask .ravel ().astype (bool )]
3886
+ if mask is not None :
3887
+ m = ~ mask .ravel ().astype (bool , copy = False )
3888
+ if not m .all ():
3889
+ rows = rows [m ]
3877
3890
3878
3891
except Exception as detail :
3879
3892
raise Exception ("cannot create row-data -> %s" % detail )
@@ -4240,14 +4253,14 @@ def _convert_index(index, encoding=None, format_type=None):
4240
4253
tz = getattr (index , 'tz' , None ),
4241
4254
index_name = index_name )
4242
4255
elif inferred_type == 'datetime' :
4243
- converted = np .array ([(time .mktime (v .timetuple ()) +
4244
- v .microsecond / 1E6 ) for v in values ],
4245
- dtype = np .float64 )
4256
+ converted = np .asarray ([(time .mktime (v .timetuple ()) +
4257
+ v .microsecond / 1E6 ) for v in values ],
4258
+ dtype = np .float64 )
4246
4259
return IndexCol (converted , 'datetime' , _tables ().Time64Col (),
4247
4260
index_name = index_name )
4248
4261
elif inferred_type == 'date' :
4249
- converted = np .array ([v .toordinal () for v in values ],
4250
- dtype = np .int32 )
4262
+ converted = np .asarray ([v .toordinal () for v in values ],
4263
+ dtype = np .int32 )
4251
4264
return IndexCol (converted , 'date' , _tables ().Time32Col (),
4252
4265
index_name = index_name )
4253
4266
elif inferred_type == 'string' :
@@ -4290,21 +4303,21 @@ def _unconvert_index(data, kind, encoding=None):
4290
4303
if kind == u ('datetime64' ):
4291
4304
index = DatetimeIndex (data )
4292
4305
elif kind == u ('datetime' ):
4293
- index = np .array ([datetime .fromtimestamp (v ) for v in data ],
4294
- dtype = object )
4306
+ index = np .asarray ([datetime .fromtimestamp (v ) for v in data ],
4307
+ dtype = object )
4295
4308
elif kind == u ('date' ):
4296
4309
try :
4297
- index = np .array (
4310
+ index = np .asarray (
4298
4311
[date .fromordinal (v ) for v in data ], dtype = object )
4299
4312
except (ValueError ):
4300
- index = np .array (
4313
+ index = np .asarray (
4301
4314
[date .fromtimestamp (v ) for v in data ], dtype = object )
4302
4315
elif kind in (u ('integer' ), u ('float' )):
4303
- index = np .array (data )
4316
+ index = np .asarray (data )
4304
4317
elif kind in (u ('string' )):
4305
4318
index = _unconvert_string_array (data , nan_rep = None , encoding = encoding )
4306
4319
elif kind == u ('object' ):
4307
- index = np .array (data [0 ])
4320
+ index = np .asarray (data [0 ])
4308
4321
else : # pragma: no cover
4309
4322
raise ValueError ('unrecognized index type %s' % kind )
4310
4323
return index
@@ -4315,7 +4328,7 @@ def _unconvert_index_legacy(data, kind, legacy=False, encoding=None):
4315
4328
if kind == u ('datetime' ):
4316
4329
index = lib .time64_to_datetime (data )
4317
4330
elif kind in (u ('integer' )):
4318
- index = np .array (data , dtype = object )
4331
+ index = np .asarray (data , dtype = object )
4319
4332
elif kind in (u ('string' )):
4320
4333
index = _unconvert_string_array (data , nan_rep = None , encoding = encoding )
4321
4334
else : # pragma: no cover
@@ -4334,13 +4347,13 @@ def _convert_string_array(data, encoding, itemsize=None):
4334
4347
if itemsize is None :
4335
4348
itemsize = lib .max_len_string_array (com ._ensure_object (data .ravel ()))
4336
4349
4337
- data = np .array (data , dtype = "S%d" % itemsize )
4350
+ data = np .asarray (data , dtype = "S%d" % itemsize )
4338
4351
return data
4339
4352
4340
4353
def _unconvert_string_array (data , nan_rep = None , encoding = None ):
4341
4354
""" deserialize a string array, possibly decoding """
4342
4355
shape = data .shape
4343
- data = np .array (data .ravel (), dtype = object )
4356
+ data = np .asarray (data .ravel (), dtype = object )
4344
4357
4345
4358
# guard against a None encoding in PY3 (because of a legacy
4346
4359
# where the passed encoding is actually None)
@@ -4353,7 +4366,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None):
4353
4366
dtype = "U{0}" .format (itemsize )
4354
4367
else :
4355
4368
dtype = "S{0}" .format (itemsize )
4356
- data = data .astype (dtype ).astype (object )
4369
+ data = data .astype (dtype , copy = False ).astype (object , copy = False )
4357
4370
except (Exception ) as e :
4358
4371
f = np .vectorize (lambda x : x .decode (encoding ), otypes = [np .object ])
4359
4372
data = f (data )
@@ -4376,7 +4389,7 @@ def _maybe_convert(values, val_kind, encoding):
4376
4389
def _get_converter (kind , encoding ):
4377
4390
kind = _ensure_decoded (kind )
4378
4391
if kind == 'datetime64' :
4379
- return lambda x : np .array (x , dtype = 'M8[ns]' )
4392
+ return lambda x : np .asarray (x , dtype = 'M8[ns]' )
4380
4393
elif kind == 'datetime' :
4381
4394
return lib .convert_timestamps
4382
4395
elif kind == 'string' :
@@ -4421,7 +4434,7 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs):
4421
4434
try :
4422
4435
inferred = lib .infer_dtype (where )
4423
4436
if inferred == 'integer' or inferred == 'boolean' :
4424
- where = np .array (where )
4437
+ where = np .asarray (where )
4425
4438
if where .dtype == np .bool_ :
4426
4439
start , stop = self .start , self .stop
4427
4440
if start is None :
0 commit comments