1
1
# -*- coding: utf-8 -*-
2
2
# cython: profile=False
3
3
4
+ cimport cython
4
5
from cython cimport Py_ssize_t
5
6
6
7
import numpy as np
@@ -10,23 +11,12 @@ cnp.import_array()
10
11
11
12
from util cimport is_string_object, get_nat
12
13
13
- from pandas._libs.khash cimport (khiter_t,
14
- kh_destroy_int64, kh_put_int64,
15
- kh_init_int64, kh_int64_t,
16
- kh_resize_int64, kh_get_int64)
17
-
18
14
from np_datetime cimport npy_datetimestruct, dt64_to_dtstruct
19
15
from frequencies cimport get_freq_code
20
16
from timezones cimport (is_utc, is_tzlocal,
21
17
maybe_get_tz, get_dst_info)
22
- from fields import build_field_sarray
23
- from conversion import tz_convert
24
18
from conversion cimport tz_convert_utc_to_tzlocal
25
- from ccalendar import MONTH_ALIASES, int_to_weekday
26
19
from ccalendar cimport get_days_in_month
27
- from timestamps import Timestamp
28
-
29
- from pandas._libs.properties import cache_readonly
30
20
31
21
# ----------------------------------------------------------------------
32
22
# Constants
@@ -41,13 +31,6 @@ cdef int RESO_MIN = 4
41
31
cdef int RESO_HR = 5
42
32
cdef int RESO_DAY = 6
43
33
44
- _ONE_MICRO = < int64_t> 1000L
45
- _ONE_MILLI = < int64_t> (_ONE_MICRO * 1000 )
46
- _ONE_SECOND = < int64_t> (_ONE_MILLI * 1000 )
47
- _ONE_MINUTE = < int64_t> (60 * _ONE_SECOND)
48
- _ONE_HOUR = < int64_t> (60 * _ONE_MINUTE)
49
- _ONE_DAY = < int64_t> (24 * _ONE_HOUR)
50
-
51
34
# ----------------------------------------------------------------------
52
35
53
36
cpdef resolution(ndarray[int64_t] stamps, tz = None ):
@@ -331,31 +314,7 @@ class Resolution(object):
331
314
# ----------------------------------------------------------------------
332
315
# Frequency Inference
333
316
334
- cdef ndarray[int64_t, ndim= 1 ] unique_deltas(ndarray[int64_t] arr):
335
- cdef:
336
- Py_ssize_t i, n = len (arr)
337
- int64_t val
338
- khiter_t k
339
- kh_int64_t * table
340
- int ret = 0
341
- list uniques = []
342
-
343
- table = kh_init_int64()
344
- kh_resize_int64(table, 10 )
345
- for i in range (n - 1 ):
346
- val = arr[i + 1 ] - arr[i]
347
- k = kh_get_int64(table, val)
348
- if k == table.n_buckets:
349
- kh_put_int64(table, val, & ret)
350
- uniques.append(val)
351
- kh_destroy_int64(table)
352
-
353
- result = np.array(uniques, dtype = np.int64)
354
- result.sort()
355
- return result
356
-
357
-
358
- cdef object month_position_check(fields, weekdays):
317
+ def month_position_check (fields , weekdays ):
359
318
cdef:
360
319
int32_t daysinmonth, y, m, d
361
320
bint calendar_end = True
@@ -397,247 +356,3 @@ cdef object month_position_check(fields, weekdays):
397
356
return ' bs'
398
357
else :
399
358
return None
400
-
401
-
402
- cdef inline bint _is_multiple(int64_t us, int64_t mult):
403
- return us % mult == 0
404
-
405
-
406
- cdef inline str _maybe_add_count(str base, int64_t count):
407
- if count != 1 :
408
- return ' {count}{base}' .format(count = count, base = base)
409
- else :
410
- return base
411
-
412
-
413
- cdef class _FrequencyInferer(object ):
414
- """
415
- Not sure if I can avoid the state machine here
416
- """
417
- cdef public:
418
- object index
419
- object values
420
- bint warn
421
- bint is_monotonic
422
- dict _cache
423
-
424
- def __init__ (self , index , warn = True ):
425
- self .index = index
426
- self .values = np.asarray(index).view(' i8' )
427
-
428
- # This moves the values, which are implicitly in UTC, to the
429
- # the timezone so they are in local time
430
- if hasattr (index, ' tz' ):
431
- if index.tz is not None :
432
- self .values = tz_convert(self .values, ' UTC' , index.tz)
433
-
434
- self .warn = warn
435
-
436
- if len (index) < 3 :
437
- raise ValueError (' Need at least 3 dates to infer frequency' )
438
-
439
- self .is_monotonic = (self .index.is_monotonic_increasing or
440
- self .index.is_monotonic_decreasing)
441
-
442
- @cache_readonly
443
- def deltas (self ):
444
- return unique_deltas(self .values)
445
-
446
- @cache_readonly
447
- def deltas_asi8 (self ):
448
- return unique_deltas(self .index.asi8)
449
-
450
- @cache_readonly
451
- def is_unique (self ):
452
- return len (self .deltas) == 1
453
-
454
- @cache_readonly
455
- def is_unique_asi8 (self ):
456
- return len (self .deltas_asi8) == 1
457
-
458
- def get_freq (self ):
459
- if not self .is_monotonic or not self .index.is_unique:
460
- return None
461
-
462
- delta = self .deltas[0 ]
463
- if _is_multiple(delta, _ONE_DAY):
464
- return self ._infer_daily_rule()
465
- else :
466
- # Business hourly, maybe. 17: one day / 65: one weekend
467
- if self .hour_deltas in ([1 , 17 ], [1 , 65 ], [1 , 17 , 65 ]):
468
- return ' BH'
469
- # Possibly intraday frequency. Here we use the
470
- # original .asi8 values as the modified values
471
- # will not work around DST transitions. See #8772
472
- elif not self .is_unique_asi8:
473
- return None
474
- delta = self .deltas_asi8[0 ]
475
- if _is_multiple(delta, _ONE_HOUR):
476
- # Hours
477
- return _maybe_add_count(' H' , delta / _ONE_HOUR)
478
- elif _is_multiple(delta, _ONE_MINUTE):
479
- # Minutes
480
- return _maybe_add_count(' T' , delta / _ONE_MINUTE)
481
- elif _is_multiple(delta, _ONE_SECOND):
482
- # Seconds
483
- return _maybe_add_count(' S' , delta / _ONE_SECOND)
484
- elif _is_multiple(delta, _ONE_MILLI):
485
- # Milliseconds
486
- return _maybe_add_count(' L' , delta / _ONE_MILLI)
487
- elif _is_multiple(delta, _ONE_MICRO):
488
- # Microseconds
489
- return _maybe_add_count(' U' , delta / _ONE_MICRO)
490
- else :
491
- # Nanoseconds
492
- return _maybe_add_count(' N' , delta)
493
-
494
- @cache_readonly
495
- def day_deltas (self ):
496
- return [x / _ONE_DAY for x in self .deltas]
497
-
498
- @cache_readonly
499
- def hour_deltas (self ):
500
- return [x / _ONE_HOUR for x in self .deltas]
501
-
502
- @cache_readonly
503
- def fields (self ):
504
- return build_field_sarray(self .values)
505
-
506
- @cache_readonly
507
- def rep_stamp (self ):
508
- return Timestamp(self .values[0 ])
509
-
510
- cdef object month_position_check(self ):
511
- return month_position_check(self .fields, self .index.dayofweek)
512
-
513
- @cache_readonly
514
- def mdiffs (self ):
515
- nmonths = self .fields[' Y' ] * 12 + self .fields[' M' ]
516
- return unique_deltas(nmonths.astype(' i8' ))
517
-
518
- @cache_readonly
519
- def ydiffs (self ):
520
- return unique_deltas(self .fields[' Y' ].astype(' i8' ))
521
-
522
- cdef _infer_daily_rule(self ):
523
- annual_rule = self ._get_annual_rule()
524
- if annual_rule:
525
- nyears = self .ydiffs[0 ]
526
- month = MONTH_ALIASES[self .rep_stamp.month]
527
- alias = ' {prefix}-{month}' .format(prefix = annual_rule, month = month)
528
- return _maybe_add_count(alias, nyears)
529
-
530
- quarterly_rule = self ._get_quarterly_rule()
531
- if quarterly_rule:
532
- nquarters = self .mdiffs[0 ] / 3
533
- mod_dict = {0 : 12 , 2 : 11 , 1 : 10 }
534
- month = MONTH_ALIASES[mod_dict[self .rep_stamp.month % 3 ]]
535
- alias = ' {prefix}-{month}' .format(prefix = quarterly_rule,
536
- month = month)
537
- return _maybe_add_count(alias, nquarters)
538
-
539
- monthly_rule = self ._get_monthly_rule()
540
- if monthly_rule:
541
- return _maybe_add_count(monthly_rule, self .mdiffs[0 ])
542
-
543
- if self .is_unique:
544
- days = self .deltas[0 ] / _ONE_DAY
545
- if days % 7 == 0 :
546
- # Weekly
547
- day = int_to_weekday[self .rep_stamp.weekday()]
548
- return _maybe_add_count(' W-{day}' .format(day = day), days / 7 )
549
- else :
550
- return _maybe_add_count(' D' , days)
551
-
552
- if self ._is_business_daily():
553
- return ' B'
554
-
555
- wom_rule = self ._get_wom_rule()
556
- if wom_rule:
557
- return wom_rule
558
-
559
- cdef _get_annual_rule(self ):
560
- if len (self .ydiffs) > 1 :
561
- return None
562
-
563
- # lazy import to prevent circularity
564
- # TODO: Avoid non-cython dependency
565
- from pandas.core.algorithms import unique
566
-
567
- if len (unique(self .fields[' M' ])) > 1 :
568
- return None
569
-
570
- pos_check = self .month_position_check()
571
- return {' cs' : ' AS' , ' bs' : ' BAS' ,
572
- ' ce' : ' A' , ' be' : ' BA' }.get(pos_check)
573
-
574
- cdef _get_quarterly_rule(self ):
575
- if len (self .mdiffs) > 1 :
576
- return None
577
-
578
- if not self .mdiffs[0 ] % 3 == 0 :
579
- return None
580
-
581
- pos_check = self .month_position_check()
582
- return {' cs' : ' QS' , ' bs' : ' BQS' ,
583
- ' ce' : ' Q' , ' be' : ' BQ' }.get(pos_check)
584
-
585
- cdef _get_monthly_rule(self ):
586
- if len (self .mdiffs) > 1 :
587
- return None
588
- pos_check = self .month_position_check()
589
- return {' cs' : ' MS' , ' bs' : ' BMS' ,
590
- ' ce' : ' M' , ' be' : ' BM' }.get(pos_check)
591
-
592
- cdef bint _is_business_daily(self ):
593
- # quick check: cannot be business daily
594
- if self .day_deltas != [1 , 3 ]:
595
- return False
596
-
597
- # probably business daily, but need to confirm
598
- first_weekday = self .index[0 ].weekday()
599
- shifts = np.diff(self .index.asi8)
600
- shifts = np.floor_divide(shifts, _ONE_DAY)
601
- weekdays = np.mod(first_weekday + np.cumsum(shifts), 7 )
602
- return np.all(((weekdays == 0 ) & (shifts == 3 )) |
603
- ((weekdays > 0 ) & (weekdays <= 4 ) & (shifts == 1 )))
604
-
605
- cdef _get_wom_rule(self ):
606
- # wdiffs = unique(np.diff(self.index.week))
607
- # We also need -47, -49, -48 to catch index spanning year boundary
608
- # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all():
609
- # return None
610
-
611
- # lazy import to prevent circularity
612
- # TODO: Avoid non-cython dependency
613
- from pandas.core.algorithms import unique
614
-
615
- weekdays = unique(self .index.weekday)
616
- if len (weekdays) > 1 :
617
- return None
618
-
619
- week_of_months = unique((self .index.day - 1 ) // 7 )
620
- # Only attempt to infer up to WOM-4. See #9425
621
- week_of_months = week_of_months[week_of_months < 4 ]
622
- if len (week_of_months) == 0 or len (week_of_months) > 1 :
623
- return None
624
-
625
- # get which week
626
- week = week_of_months[0 ] + 1
627
- wd = int_to_weekday[weekdays[0 ]]
628
-
629
- return ' WOM-{week}{weekday}' .format(week = week, weekday = wd)
630
-
631
-
632
- cdef class _TimedeltaFrequencyInferer(_FrequencyInferer):
633
-
634
- cdef _infer_daily_rule(self ):
635
- if self .is_unique:
636
- days = self .deltas[0 ] / _ONE_DAY
637
- if days % 7 == 0 :
638
- # Weekly
639
- wd = int_to_weekday[self .rep_stamp.weekday()]
640
- alias = ' W-{weekday}' .format(weekday = wd)
641
- return _maybe_add_count(alias, days / 7 )
642
- else :
643
- return _maybe_add_count(' D' , days)
0 commit comments