3
3
# don't introduce a pandas/pandas.compat import
4
4
# or we get a bootstrapping problem
5
5
from StringIO import StringIO
6
+ import numpy as np
7
+
8
+ _int64_max = np .iinfo (np .int64 ).max
6
9
7
10
header = """
8
11
cimport numpy as np
@@ -680,7 +683,7 @@ def group_last_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
680
683
for i in range(len(counts)):
681
684
for j in range(K):
682
685
if nobs[i, j] == 0:
683
- out[i, j] = nan
686
+ out[i, j] = %(nan_val)s
684
687
else:
685
688
out[i, j] = resx[i, j]
686
689
"""
@@ -726,7 +729,7 @@ def group_last_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
726
729
for i in range(ngroups):
727
730
for j in range(K):
728
731
if nobs[i, j] == 0:
729
- out[i, j] = nan
732
+ out[i, j] = %(nan_val)s
730
733
else:
731
734
out[i, j] = resx[i, j]
732
735
"""
@@ -773,7 +776,7 @@ def group_nth_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
773
776
for i in range(ngroups):
774
777
for j in range(K):
775
778
if nobs[i, j] == 0:
776
- out[i, j] = nan
779
+ out[i, j] = %(nan_val)s
777
780
else:
778
781
out[i, j] = resx[i, j]
779
782
"""
@@ -819,7 +822,7 @@ def group_nth_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
819
822
for i in range(len(counts)):
820
823
for j in range(K):
821
824
if nobs[i, j] == 0:
822
- out[i, j] = nan
825
+ out[i, j] = %(nan_val)s
823
826
else:
824
827
out[i, j] = resx[i, j]
825
828
"""
@@ -1278,7 +1281,7 @@ def group_min_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
1278
1281
nobs = np.zeros_like(out)
1279
1282
1280
1283
minx = np.empty_like(out)
1281
- minx.fill(np.inf )
1284
+ minx.fill(%(inf_val)s )
1282
1285
1283
1286
if bins[len(bins) - 1] == len(values):
1284
1287
ngroups = len(bins)
@@ -1319,7 +1322,7 @@ def group_min_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
1319
1322
for i in range(ngroups):
1320
1323
for j in range(K):
1321
1324
if nobs[i, j] == 0:
1322
- out[i, j] = nan
1325
+ out[i, j] = %(nan_val)s
1323
1326
else:
1324
1327
out[i, j] = minx[i, j]
1325
1328
"""
@@ -1344,7 +1347,7 @@ def group_max_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
1344
1347
nobs = np.zeros_like(out)
1345
1348
1346
1349
maxx = np.empty_like(out)
1347
- maxx.fill(-np.inf )
1350
+ maxx.fill(-%(inf_val)s )
1348
1351
1349
1352
N, K = (<object> values).shape
1350
1353
@@ -1381,7 +1384,7 @@ def group_max_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
1381
1384
for i in range(len(counts)):
1382
1385
for j in range(K):
1383
1386
if nobs[i, j] == 0:
1384
- out[i, j] = nan
1387
+ out[i, j] = %(nan_val)s
1385
1388
else:
1386
1389
out[i, j] = maxx[i, j]
1387
1390
"""
@@ -1402,7 +1405,7 @@ def group_max_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
1402
1405
1403
1406
nobs = np.zeros_like(out)
1404
1407
maxx = np.empty_like(out)
1405
- maxx.fill(-np.inf )
1408
+ maxx.fill(-%(inf_val)s )
1406
1409
1407
1410
if bins[len(bins) - 1] == len(values):
1408
1411
ngroups = len(bins)
@@ -1443,7 +1446,7 @@ def group_max_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
1443
1446
for i in range(ngroups):
1444
1447
for j in range(K):
1445
1448
if nobs[i, j] == 0:
1446
- out[i, j] = nan
1449
+ out[i, j] = %(nan_val)s
1447
1450
else:
1448
1451
out[i, j] = maxx[i, j]
1449
1452
"""
@@ -1469,7 +1472,7 @@ def group_min_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
1469
1472
nobs = np.zeros_like(out)
1470
1473
1471
1474
minx = np.empty_like(out)
1472
- minx.fill(np.inf )
1475
+ minx.fill(%(inf_val)s )
1473
1476
1474
1477
N, K = (<object> values).shape
1475
1478
@@ -1506,7 +1509,7 @@ def group_min_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
1506
1509
for i in range(len(counts)):
1507
1510
for j in range(K):
1508
1511
if nobs[i, j] == 0:
1509
- out[i, j] = nan
1512
+ out[i, j] = %(nan_val)s
1510
1513
else:
1511
1514
out[i, j] = minx[i, j]
1512
1515
"""
@@ -2286,6 +2289,70 @@ def generate_put_template(template, use_ints=True, use_floats=True,
2286
2289
output .write (func )
2287
2290
return output .getvalue ()
2288
2291
2292
+ def generate_put_min_max_template (template , use_ints = True , use_floats = True ,
2293
+ use_objects = False , use_datelikes = False ):
2294
+ floats_list = [
2295
+ ('float64' , 'float64_t' , 'nan' , 'np.inf' ),
2296
+ ('float32' , 'float32_t' , 'nan' , 'np.inf' ),
2297
+ ]
2298
+ ints_list = [
2299
+ ('int64' , 'int64_t' , 'iNaT' , _int64_max ),
2300
+ ]
2301
+ date_like_list = [
2302
+ ('int64' , 'int64_t' , 'iNaT' , _int64_max ),
2303
+ ]
2304
+ object_list = [('object' , 'object' , 'nan' , 'np.inf' )]
2305
+ function_list = []
2306
+ if use_floats :
2307
+ function_list .extend (floats_list )
2308
+ if use_ints :
2309
+ function_list .extend (ints_list )
2310
+ if use_objects :
2311
+ function_list .extend (object_list )
2312
+ if use_datelikes :
2313
+ function_list .extend (date_like_list )
2314
+
2315
+ output = StringIO ()
2316
+ for name , dest_type , nan_val , inf_val in function_list :
2317
+ func = template % {'name' : name ,
2318
+ 'dest_type2' : dest_type ,
2319
+ 'nan_val' : nan_val ,
2320
+ 'inf_val' : inf_val }
2321
+ output .write (func )
2322
+ return output .getvalue ()
2323
+
2324
+ def generate_put_selection_template (template , use_ints = True , use_floats = True ,
2325
+ use_objects = False , use_datelikes = False ):
2326
+ floats_list = [
2327
+ ('float64' , 'float64_t' , 'float64_t' , 'nan' ),
2328
+ ('float32' , 'float32_t' , 'float32_t' , 'nan' ),
2329
+ ]
2330
+ ints_list = [
2331
+ ('int64' , 'int64_t' , 'int64_t' , 'iNaT' ),
2332
+ ]
2333
+ date_like_list = [
2334
+ ('int64' , 'int64_t' , 'int64_t' , 'iNaT' ),
2335
+ ]
2336
+ object_list = [('object' , 'object' , 'object' , 'nan' )]
2337
+ function_list = []
2338
+ if use_floats :
2339
+ function_list .extend (floats_list )
2340
+ if use_ints :
2341
+ function_list .extend (ints_list )
2342
+ if use_objects :
2343
+ function_list .extend (object_list )
2344
+ if use_datelikes :
2345
+ function_list .extend (date_like_list )
2346
+
2347
+ output = StringIO ()
2348
+ for name , c_type , dest_type , nan_val in function_list :
2349
+ func = template % {'name' : name ,
2350
+ 'c_type' : c_type ,
2351
+ 'dest_type2' : dest_type ,
2352
+ 'nan_val' : nan_val }
2353
+ output .write (func )
2354
+ return output .getvalue ()
2355
+
2289
2356
def generate_take_template (template , exclude = None ):
2290
2357
# name, dest, ctypein, ctypeout, preval, postval, cancopy
2291
2358
function_list = [
@@ -2347,24 +2414,27 @@ def generate_from_template(template, exclude=None):
2347
2414
return output .getvalue ()
2348
2415
2349
2416
put_2d = [diff_2d_template ]
2350
- groupbys = [group_last_template ,
2351
- group_last_bin_template ,
2352
- group_nth_template ,
2353
- group_nth_bin_template ,
2354
- group_add_template ,
2417
+
2418
+ groupbys = [group_add_template ,
2355
2419
group_add_bin_template ,
2356
2420
group_prod_template ,
2357
2421
group_prod_bin_template ,
2358
2422
group_var_template ,
2359
2423
group_var_bin_template ,
2360
2424
group_mean_template ,
2361
2425
group_mean_bin_template ,
2362
- group_min_template ,
2363
- group_min_bin_template ,
2364
- group_max_template ,
2365
- group_max_bin_template ,
2366
2426
group_ohlc_template ]
2367
2427
2428
+ groupby_selection = [group_last_template ,
2429
+ group_last_bin_template ,
2430
+ group_nth_template ,
2431
+ group_nth_bin_template ]
2432
+
2433
+ groupby_min_max = [group_min_template ,
2434
+ group_min_bin_template ,
2435
+ group_max_template ,
2436
+ group_max_bin_template ]
2437
+
2368
2438
groupby_count = [group_count_template , group_count_bin_template ]
2369
2439
2370
2440
templates_1d = [map_indices_template ,
@@ -2407,9 +2477,18 @@ def generate_take_cython_file(path='generated.pyx'):
2407
2477
for template in groupbys :
2408
2478
print (generate_put_template (template , use_ints = False ), file = f )
2409
2479
2480
+ for template in groupby_selection :
2481
+ print (generate_put_selection_template (template , use_ints = True ),
2482
+ file = f )
2483
+
2484
+ for template in groupby_min_max :
2485
+ print (generate_put_min_max_template (template , use_ints = True ),
2486
+ file = f )
2487
+
2410
2488
for template in groupby_count :
2411
- print (generate_put_template (template , use_ints = False ,
2412
- use_datelikes = True , use_objects = True ),
2489
+ print (generate_put_selection_template (template , use_ints = True ,
2490
+ use_datelikes = True ,
2491
+ use_objects = True ),
2413
2492
file = f )
2414
2493
2415
2494
# for template in templates_1d_datetime:
0 commit comments