4
4
.. ipython :: python
5
5
:suppress:
6
6
7
- import datetime
8
7
import numpy as np
9
8
import pandas as pd
10
- from pandas import offsets
9
+
11
10
np.random.seed(123456 )
12
- randn = np.random.randn
13
- randint = np.random.randint
14
11
np.set_printoptions(precision = 4 , suppress = True )
15
- pd.options.display.max_rows= 15
16
- import dateutil
17
- import pytz
18
- from dateutil.relativedelta import relativedelta
12
+ pd.options.display.max_rows = 15
19
13
20
14
********************************
21
15
Time Series / Date functionality
@@ -32,7 +26,10 @@ Parsing time series information from various sources and formats
32
26
33
27
.. ipython :: python
34
28
35
- dti = pd.to_datetime([' 1/1/2018' , np.datetime64(' 2018-01-01' ), datetime.datetime(2018 , 1 , 1 )])
29
+ import datetime
30
+
31
+ dti = pd.to_datetime([' 1/1/2018' , np.datetime64(' 2018-01-01' ),
32
+ datetime.datetime(2018 , 1 , 1 )])
36
33
dti
37
34
38
35
Generate sequences of fixed-frequency dates and time spans
@@ -165,7 +162,9 @@ and :class:`PeriodIndex` respectively.
165
162
166
163
.. ipython :: python
167
164
168
- dates = [pd.Timestamp(' 2012-05-01' ), pd.Timestamp(' 2012-05-02' ), pd.Timestamp(' 2012-05-03' )]
165
+ dates = [pd.Timestamp(' 2012-05-01' ),
166
+ pd.Timestamp(' 2012-05-02' ),
167
+ pd.Timestamp(' 2012-05-03' )]
169
168
ts = pd.Series(np.random.randn(3 ), dates)
170
169
171
170
type (ts.index)
@@ -329,7 +328,7 @@ which can be specified. These are computed from the starting point specified by
329
328
1349979305 , 1350065705 ], unit = ' s' )
330
329
331
330
pd.to_datetime([1349720105100 , 1349720105200 , 1349720105300 ,
332
- 1349720105400 , 1349720105500 ], unit = ' ms' )
331
+ 1349720105400 , 1349720105500 ], unit = ' ms' )
333
332
334
333
.. note ::
335
334
@@ -402,7 +401,9 @@ To generate an index with timestamps, you can use either the ``DatetimeIndex`` o
402
401
403
402
.. ipython :: python
404
403
405
- dates = [datetime.datetime(2012 , 5 , 1 ), datetime.datetime(2012 , 5 , 2 ), datetime.datetime(2012 , 5 , 3 )]
404
+ dates = [datetime.datetime(2012 , 5 , 1 ),
405
+ datetime.datetime(2012 , 5 , 2 ),
406
+ datetime.datetime(2012 , 5 , 3 )]
406
407
407
408
# Note the frequency information
408
409
index = pd.DatetimeIndex(dates)
@@ -585,9 +586,8 @@ would include matching times on an included date:
585
586
586
587
.. ipython :: python
587
588
588
- dft = pd.DataFrame(randn(100000 ,1 ),
589
- columns = [' A' ],
590
- index = pd.date_range(' 20130101' ,periods = 100000 ,freq = ' T' ))
589
+ dft = pd.DataFrame(np.random.randn(100000 , 1 ), columns = [' A' ],
590
+ index = pd.date_range(' 20130101' , periods = 100000 , freq = ' T' ))
591
591
dft
592
592
dft[' 2013' ]
593
593
@@ -624,10 +624,9 @@ We are stopping on the included end-point as it is part of the index:
624
624
625
625
dft2 = pd.DataFrame(np.random.randn(20 , 1 ),
626
626
columns = [' A' ],
627
- index = pd.MultiIndex.from_product([pd.date_range(' 20130101' ,
628
- periods = 10 ,
629
- freq = ' 12H' ),
630
- [' a' , ' b' ]]))
627
+ index = pd.MultiIndex.from_product(
628
+ [pd.date_range(' 20130101' , periods = 10 , freq = ' 12H' ),
629
+ [' a' , ' b' ]]))
631
630
dft2
632
631
dft2.loc[' 2013-01-05' ]
633
632
idx = pd.IndexSlice
@@ -683,7 +682,7 @@ If the timestamp string is treated as a slice, it can be used to index ``DataFra
683
682
.. ipython :: python
684
683
685
684
dft_minute = pd.DataFrame({' a' : [1 , 2 , 3 ], ' b' : [4 , 5 , 6 ]},
686
- index = series_minute.index)
685
+ index = series_minute.index)
687
686
dft_minute[' 2011-12-31 23' ]
688
687
689
688
@@ -695,18 +694,16 @@ If the timestamp string is treated as a slice, it can be used to index ``DataFra
695
694
696
695
.. ipython :: python
697
696
698
- dft_minute.loc[' 2011-12-31 23:59' ]
697
+ dft_minute.loc[' 2011-12-31 23:59' ]
699
698
700
699
Note also that ``DatetimeIndex `` resolution cannot be less precise than day.
701
700
702
701
.. ipython :: python
703
702
704
703
series_monthly = pd.Series([1 , 2 , 3 ],
705
- pd.DatetimeIndex([' 2011-12' ,
706
- ' 2012-01' ,
707
- ' 2012-02' ]))
704
+ pd.DatetimeIndex([' 2011-12' , ' 2012-01' , ' 2012-02' ]))
708
705
series_monthly.index.resolution
709
- series_monthly[' 2011-12' ] # returns Series
706
+ series_monthly[' 2011-12' ] # returns Series
710
707
711
708
712
709
Exact Indexing
@@ -718,13 +715,14 @@ These ``Timestamp`` and ``datetime`` objects have exact ``hours, minutes,`` and
718
715
719
716
.. ipython :: python
720
717
721
- dft[datetime.datetime(2013 , 1 , 1 ):datetime.datetime(2013 ,2 , 28 )]
718
+ dft[datetime.datetime(2013 , 1 , 1 ):datetime.datetime(2013 , 2 , 28 )]
722
719
723
720
With no defaults.
724
721
725
722
.. ipython :: python
726
723
727
- dft[datetime.datetime(2013 , 1 , 1 , 10 , 12 , 0 ):datetime.datetime(2013 , 2 , 28 , 10 , 12 , 0 )]
724
+ dft[datetime.datetime(2013 , 1 , 1 , 10 , 12 , 0 ):
725
+ datetime.datetime(2013 , 2 , 28 , 10 , 12 , 0 )]
728
726
729
727
730
728
Truncating & Fancy Indexing
@@ -1045,14 +1043,16 @@ As an interesting example, let's look at Egypt where a Friday-Saturday weekend i
1045
1043
1046
1044
.. ipython :: python
1047
1045
1048
- from pandas.tseries.offsets import CustomBusinessDay
1049
1046
weekmask_egypt = ' Sun Mon Tue Wed Thu'
1050
1047
1051
1048
# They also observe International Workers' Day so let's
1052
1049
# add that for a couple of years
1053
1050
1054
- holidays = [' 2012-05-01' , datetime.datetime(2013 , 5 , 1 ), np.datetime64(' 2014-05-01' )]
1055
- bday_egypt = CustomBusinessDay(holidays = holidays, weekmask = weekmask_egypt)
1051
+ holidays = [' 2012-05-01' ,
1052
+ datetime.datetime(2013 , 5 , 1 ),
1053
+ np.datetime64(' 2014-05-01' )]
1054
+ bday_egypt = pd.offsets.CustomBusinessDay(holidays = holidays,
1055
+ weekmask = weekmask_egypt)
1056
1056
dt = datetime.datetime(2013 , 4 , 30 )
1057
1057
dt + 2 * bday_egypt
1058
1058
@@ -1062,7 +1062,8 @@ Let's map to the weekday names:
1062
1062
1063
1063
dts = pd.date_range(dt, periods = 5 , freq = bday_egypt)
1064
1064
1065
- pd.Series(dts.weekday, dts).map(pd.Series(' Mon Tue Wed Thu Fri Sat Sun' .split()))
1065
+ pd.Series(dts.weekday, dts).map(
1066
+ pd.Series(' Mon Tue Wed Thu Fri Sat Sun' .split()))
1066
1067
1067
1068
Holiday calendars can be used to provide the list of holidays. See the
1068
1069
:ref: `holiday calendar<timeseries.holiday> ` section for more information.
@@ -1071,7 +1072,7 @@ Holiday calendars can be used to provide the list of holidays. See the
1071
1072
1072
1073
from pandas.tseries.holiday import USFederalHolidayCalendar
1073
1074
1074
- bday_us = CustomBusinessDay(calendar = USFederalHolidayCalendar())
1075
+ bday_us = pd.offsets. CustomBusinessDay(calendar = USFederalHolidayCalendar())
1075
1076
1076
1077
# Friday before MLK Day
1077
1078
dt = datetime.datetime(2014 , 1 , 17 )
@@ -1084,15 +1085,15 @@ in the usual way.
1084
1085
1085
1086
.. ipython :: python
1086
1087
1087
- from pandas.tseries. offsets import CustomBusinessMonthBegin
1088
- bmth_us = pd.offsets.CustomBusinessMonthBegin( calendar = USFederalHolidayCalendar())
1088
+ bmth_us = pd. offsets. CustomBusinessMonthBegin(
1089
+ calendar = USFederalHolidayCalendar())
1089
1090
1090
1091
# Skip new years
1091
1092
dt = datetime.datetime(2013 , 12 , 17 )
1092
1093
dt + bmth_us
1093
1094
1094
1095
# Define date index with custom offset
1095
- pd.DatetimeIndex(start = ' 20100101' ,end = ' 20120101' ,freq = bmth_us)
1096
+ pd.DatetimeIndex(start = ' 20100101' , end = ' 20120101' , freq = bmth_us)
1096
1097
1097
1098
.. note ::
1098
1099
@@ -1231,7 +1232,8 @@ You can use keyword arguments supported by either ``BusinessHour`` and ``CustomB
1231
1232
1232
1233
.. ipython :: python
1233
1234
1234
- bhour_mon = pd.offsets.CustomBusinessHour(start = ' 10:00' , weekmask = ' Tue Wed Thu Fri' )
1235
+ bhour_mon = pd.offsets.CustomBusinessHour(start = ' 10:00' ,
1236
+ weekmask = ' Tue Wed Thu Fri' )
1235
1237
1236
1238
# Monday is skipped because it's a holiday, business hour starts from 10:00
1237
1239
dt + bhour_mon * 2
@@ -1429,11 +1431,14 @@ An example of how holidays and holiday calendars are defined:
1429
1431
USMemorialDay,
1430
1432
Holiday(' July 4th' , month = 7 , day = 4 , observance = nearest_workday),
1431
1433
Holiday(' Columbus Day' , month = 10 , day = 1 ,
1432
- offset = pd.DateOffset(weekday = MO(2 ))), # same as 2*Week(weekday=2)
1433
- ]
1434
+ offset = pd.DateOffset(weekday = MO(2 )))]
1435
+
1434
1436
cal = ExampleCalendar()
1435
1437
cal.holidays(datetime.datetime(2012 , 1 , 1 ), datetime.datetime(2012 , 12 , 31 ))
1436
1438
1439
+ :hint:
1440
+ **weekday=MO(2) ** is same as **2 * Week(weekday=2) **
1441
+
1437
1442
Using this calendar, creating an index or doing offset arithmetic skips weekends
1438
1443
and holidays (i.e., Memorial Day/July 4th). For example, the below defines
1439
1444
a custom business day offset using the ``ExampleCalendar ``. Like any other offset,
@@ -1442,10 +1447,9 @@ or ``Timestamp`` objects.
1442
1447
1443
1448
.. ipython :: python
1444
1449
1445
- from pandas.tseries.offsets import CDay
1446
1450
pd.DatetimeIndex(start = ' 7/1/2012' , end = ' 7/10/2012' ,
1447
- freq = CDay(calendar = cal)).to_pydatetime()
1448
- offset = CustomBusinessDay(calendar = cal)
1451
+ freq = pd.offsets. CDay(calendar = cal)).to_pydatetime()
1452
+ offset = pd.offsets. CustomBusinessDay(calendar = cal)
1449
1453
datetime.datetime(2012 , 5 , 25 ) + offset
1450
1454
datetime.datetime(2012 , 7 , 3 ) + offset
1451
1455
datetime.datetime(2012 , 7 , 3 ) + 2 * offset
@@ -1532,7 +1536,7 @@ calls ``reindex``.
1532
1536
.. ipython :: python
1533
1537
1534
1538
dr = pd.date_range(' 1/1/2010' , periods = 3 , freq = 3 * pd.offsets.BDay())
1535
- ts = pd.Series(randn(3 ), index = dr)
1539
+ ts = pd.Series(np.random. randn(3 ), index = dr)
1536
1540
ts
1537
1541
ts.asfreq(pd.offsets.BDay())
1538
1542
@@ -1626,7 +1630,7 @@ labels.
1626
1630
1627
1631
.. ipython :: python
1628
1632
1629
- ts.resample(' 5Min' ).mean() # by default label='left'
1633
+ ts.resample(' 5Min' ).mean() # by default label='left'
1630
1634
1631
1635
ts.resample(' 5Min' , label = ' left' ).mean()
1632
1636
@@ -1739,7 +1743,7 @@ We can select a specific column or columns using standard getitem.
1739
1743
1740
1744
r[' A' ].mean()
1741
1745
1742
- r[[' A' ,' B' ]].mean()
1746
+ r[[' A' , ' B' ]].mean()
1743
1747
1744
1748
You can pass a list or dict of functions to do aggregation with, outputting a ``DataFrame ``:
1745
1749
@@ -1760,21 +1764,21 @@ columns of a ``DataFrame``:
1760
1764
.. ipython :: python
1761
1765
:okexcept:
1762
1766
1763
- r.agg({' A' : np.sum,
1764
- ' B' : lambda x : np.std(x, ddof = 1 )})
1767
+ r.agg({' A' : np.sum,
1768
+ ' B' : lambda x : np.std(x, ddof = 1 )})
1765
1769
1766
1770
The function names can also be strings. In order for a string to be valid it
1767
1771
must be implemented on the resampled object:
1768
1772
1769
1773
.. ipython :: python
1770
1774
1771
- r.agg({' A' : ' sum' , ' B' : ' std' })
1775
+ r.agg({' A' : ' sum' , ' B' : ' std' })
1772
1776
1773
1777
Furthermore, you can also specify multiple aggregation functions for each column separately.
1774
1778
1775
1779
.. ipython :: python
1776
1780
1777
- r.agg({' A' : [' sum' ,' std' ], ' B' : [' mean' ,' std' ] })
1781
+ r.agg({' A' : [' sum' , ' std' ], ' B' : [' mean' , ' std' ]})
1778
1782
1779
1783
1780
1784
If a ``DataFrame `` does not have a datetimelike index, but instead you want
@@ -1786,9 +1790,9 @@ to resample based on datetimelike column in the frame, it can passed to the
1786
1790
df = pd.DataFrame({' date' : pd.date_range(' 2015-01-01' , freq = ' W' , periods = 5 ),
1787
1791
' a' : np.arange(5 )},
1788
1792
index = pd.MultiIndex.from_arrays([
1789
- [1 ,2 , 3 , 4 , 5 ],
1790
- pd.date_range(' 2015-01-01' , freq = ' W' , periods = 5 )],
1791
- names = [' v' ,' d' ]))
1793
+ [1 , 2 , 3 , 4 , 5 ],
1794
+ pd.date_range(' 2015-01-01' , freq = ' W' , periods = 5 )],
1795
+ names = [' v' , ' d' ]))
1792
1796
df
1793
1797
df.resample(' M' , on = ' date' ).sum()
1794
1798
@@ -1989,9 +1993,11 @@ Passing a string representing a lower frequency than ``PeriodIndex`` returns par
1989
1993
1990
1994
ps[' 2011' ]
1991
1995
1992
- dfp = pd.DataFrame(np.random.randn(600 ,1 ),
1996
+ dfp = pd.DataFrame(np.random.randn(600 , 1 ),
1993
1997
columns = [' A' ],
1994
- index = pd.period_range(' 2013-01-01 9:00' , periods = 600 , freq = ' T' ))
1998
+ index = pd.period_range(' 2013-01-01 9:00' ,
1999
+ periods = 600 ,
2000
+ freq = ' T' ))
1995
2001
dfp
1996
2002
dfp[' 2013-01-01 10H' ]
1997
2003
@@ -2180,6 +2186,8 @@ time zones by starting with ``dateutil/``.
2180
2186
2181
2187
.. ipython :: python
2182
2188
2189
+ import dateutil
2190
+
2183
2191
# pytz
2184
2192
rng_pytz = pd.date_range(' 3/6/2012 00:00' , periods = 10 , freq = ' D' ,
2185
2193
tz = ' Europe/London' )
@@ -2201,6 +2209,8 @@ which gives you more control over which time zone is used:
2201
2209
2202
2210
.. ipython :: python
2203
2211
2212
+ import pytz
2213
+
2204
2214
# pytz
2205
2215
tz_pytz = pytz.timezone(' Europe/London' )
2206
2216
rng_pytz = pd.date_range(' 3/6/2012 00:00' , periods = 10 , freq = ' D' ,
@@ -2299,7 +2309,8 @@ To remove timezone from tz-aware ``DatetimeIndex``, use ``tz_localize(None)`` or
2299
2309
2300
2310
.. ipython :: python
2301
2311
2302
- didx = pd.DatetimeIndex(start = ' 2014-08-01 09:00' , freq = ' H' , periods = 10 , tz = ' US/Eastern' )
2312
+ didx = pd.DatetimeIndex(start = ' 2014-08-01 09:00' , freq = ' H' ,
2313
+ periods = 10 , tz = ' US/Eastern' )
2303
2314
didx
2304
2315
didx.tz_localize(None )
2305
2316
didx.tz_convert(None )
@@ -2352,7 +2363,8 @@ constructor as well as ``tz_localize``.
2352
2363
rng_hourly.tz_localize(' US/Eastern' , ambiguous = rng_hourly_dst).tolist()
2353
2364
rng_hourly.tz_localize(' US/Eastern' , ambiguous = ' NaT' ).tolist()
2354
2365
2355
- didx = pd.DatetimeIndex(start = ' 2014-08-01 09:00' , freq = ' H' , periods = 10 , tz = ' US/Eastern' )
2366
+ didx = pd.DatetimeIndex(start = ' 2014-08-01 09:00' , freq = ' H' ,
2367
+ periods = 10 , tz = ' US/Eastern' )
2356
2368
didx
2357
2369
didx.tz_localize(None )
2358
2370
didx.tz_convert(None )
@@ -2403,14 +2415,14 @@ TZ Aware Dtypes
2403
2415
2404
2416
.. ipython :: python
2405
2417
2406
- s_naive = pd.Series(pd.date_range(' 20130101' ,periods = 3 ))
2418
+ s_naive = pd.Series(pd.date_range(' 20130101' , periods = 3 ))
2407
2419
s_naive
2408
2420
2409
2421
``Series/DatetimeIndex `` with a timezone **aware ** value are represented with a dtype of ``datetime64[ns, tz] ``.
2410
2422
2411
2423
.. ipython :: python
2412
2424
2413
- s_aware = pd.Series(pd.date_range(' 20130101' ,periods = 3 ,tz = ' US/Eastern' ))
2425
+ s_aware = pd.Series(pd.date_range(' 20130101' , periods = 3 , tz = ' US/Eastern' ))
2414
2426
s_aware
2415
2427
2416
2428
Both of these ``Series `` can be manipulated via the ``.dt `` accessor, see :ref: `here <basics.dt_accessors >`.
0 commit comments