From fcde169a02af35e0b46d588bbecf85715d12857d Mon Sep 17 00:00:00 2001
From: TimMi <timmichelsen@gmx-topmail.de>
Date: Thu, 1 Nov 2012 16:14:23 +0100
Subject: [PATCH 1/5] pivot hourly using a new result shape

---
 pandas/tseries/pivot.py           | 207 ++++++++++++++++++++++++++++++
 pandas/tseries/tests/test_util.py |  54 ++++++++
 pandas/tseries/util.py            |   1 +
 3 files changed, 262 insertions(+)
 create mode 100644 pandas/tseries/pivot.py

diff --git a/pandas/tseries/pivot.py b/pandas/tseries/pivot.py
new file mode 100644
index 0000000000000..50fd60b2a03ff
--- /dev/null
+++ b/pandas/tseries/pivot.py
@@ -0,0 +1,207 @@
+import numpy as np
+
+from pandas.core.frame import DataFrame
+import pandas.core.nanops as nanops
+from pandas.tseries.util import isleapyear
+from pandas.tseries.index import date_range
+
+def pivot_annual_h(series, freq=None, dt_index=False):
+    """
+    Group a series by years, taking leap years into account.
+
+    The output has as many rows as distinct years in the original series,
+    and as many columns as the length of a leap year in the units corresponding
+    to the original frequency (366 for daily frequency, 366*24 for hourly...).
+    The fist column of the output corresponds to Jan. 1st, 00:00:00,
+    while the last column corresponds to Dec, 31st, 23:59:59.
+    Entries corresponding to Feb. 29th are masked for non-leap years.
+
+    For example, if the initial series has a daily frequency, the 59th column
+    of the output always corresponds to Feb. 28th, the 61st column to Mar. 1st,
+    and the 60th column is masked for non-leap years.
+    With a hourly initial frequency, the (59*24)th column of the output always
+    correspond to Feb. 28th 23:00, the (61*24)th column to Mar. 1st, 00:00, and
+    the 24 columns between (59*24) and (61*24) are masked.
+
+    If the original frequency is less than daily, the output is equivalent to
+    ``series.convert('A', func=None)``.
+
+    Parameters
+    ----------
+    series : TimeSeries
+    freq : string or None, default None
+
+    Returns
+    -------
+    annual : DataFrame
+    
+    
+    """
+    #TODO: test like original pandas and the position of first and last value in arrays
+    #TODO: reduce number of hardcoded values scattered all around.   
+    index = series.index
+    year = index.year
+    years = nanops.unique1d(year)    
+    
+    if freq is not None:
+        freq = freq.upper()
+    else:
+        freq = series.index.freq
+
+    if freq == 'H':
+    
+        ##basics
+        
+        #integer value of sum of all hours in a leap hear
+        total_hoy_leap = (year_length(series.index.freqstr))
+        
+        #list of all hours in a leap year
+        hoy_leap_list = range(1, (total_hoy_leap + 1 ))
+        
+        
+        
+        values = np.empty((total_hoy_leap, len(years)), dtype=series.dtype)
+        values.fill(np.nan)
+        
+        dummy_df = DataFrame(values, index=hoy_leap_list, 
+                        columns=years)
+                        
+        ##get offset for leap hours
+        
+        #see:
+        #http://stackoverflow.com/questions/2004364/increment-numpy-array-with-repeated-indices
+        #1994-02-28 23:00:00 -> index 1415
+        ind_z = np.array(range(0, 8760))
+        ind_i = np.array(range(1416,8760 ))
+        
+        ind_t = ind_z.copy()
+        ind_t[ind_i]+=24
+        
+        #TODO: beautify variable names
+        for year in years:
+            
+            # select data for the respective year
+            ser_sel = series[ series.index.year == year]
+            info = (ser_sel).values
+            
+            
+            
+            if isleapyear(year):
+                dummy_df[year] = info
+            else:
+                data = np.empty((total_hoy_leap), dtype=series.dtype)
+                data.fill(np.nan)
+                
+                ser_sel = series[ series.index.year == year]
+                info = (ser_sel).values
+                
+                data.put(ind_t, (series[ series.index.year == year]).values)
+                
+                dummy_df[year] = data
+                
+        res_df = dummy_df
+        
+        #assign a datetime index, CAUTION: the year is definatly wrong!
+        if dt_index:
+            rng = default_rng()            
+            res_df = DataFrame(res_df.values, index=rng, 
+                               columns=res_df.columns)
+        
+        return res_df
+        
+#TDOO: use pivot_annual for D & M and minute in the same fashion
+    if freq == 'D':
+        raise NotImplementedError(freq), "use pandas.tseries.util.pivot_annual"        
+        
+    if freq == 'M':
+        raise NotImplementedError(freq), "use pandas.tseries.util.pivot_annual"
+    
+    else:
+        raise NotImplementedError(freq)
+        
+    
+    return res_df
+    
+    
+### timeseries pivoting helper
+
+def last_col2front(df, col_no=1):
+    """shifts the last column of a data frame to the front
+    
+    increase col_no to shift more cols    
+    """
+    cols = cols = df.columns.tolist()
+    #increase index value to 2+ if more columns are to be shifted
+    cols = cols[-col_no:] + cols[:-col_no]
+    df = df[cols]
+    
+    return df
+    
+
+def extended_info(df, time_cols=True, aggreg=True, aggreg_func=None,
+                  datetime_index=False):
+    """add extended information to a timeseries pivot
+    """
+
+    df_extended = df.copy()
+    #perform the following only on the data columns
+    cols = df_extended.columns
+    #TODO: add standard aggregation
+    #TODO: make function be set by argument
+    #TODO: is there no a SM describe function?
+    #TODO: Maybe use http://pandas.pydata.org/pandas-docs/dev/basics.html#summarizing-data-describe
+    if aggreg:
+           
+        df_extended['mean'] = df_extended[cols].mean(1)
+        df_extended['sum'] = df_extended[cols].sum(1)
+        df_extended['min'] = df_extended[cols].min(1)
+        df_extended['max'] = df_extended[cols].max(1)
+        df_extended['max'] = df_extended[cols].std(1)
+
+    #add some metadata
+    #TODO: add function to make index a datetime with the argument above using the rng below    
+    #TODO: convert the range to lower frequencies and reuse the function.
+    rng = default_rng()
+    df_extended['doy'] = rng.dayofyear
+#    df_extended = last_col2front(df_extended)
+    df_extended['month'] = rng.month
+#    df_extended = last_col2front(df_extended)
+    df_extended['day'] = rng.day
+#    df_extended = last_col2front(df_extended)
+    df_extended['hour'] = rng.hour + 1
+    df_extended = last_col2front(df_extended, col_no=4)
+    
+    return df_extended
+    
+###Timeseries convenience / helper functions
+
+                        
+def year_length(freq, leap=True):
+    """helper function for year length at different frequencies.
+    to be expanded
+    """
+
+    daysofyear_leap = 366
+    daysofyear_nonleap = 365
+        
+    if freq == 'H':
+        if leap:        
+            length = 24 * daysofyear_leap
+        else:
+            length = 24 * daysofyear_nonleap
+            
+    return length
+
+def default_rng(freq='H', leap=True):
+    """create default ranges
+    """
+    
+    if leap:
+        total_hoy_leap = (year_length(freq='H'))    
+        rng = date_range('1/1/2012', periods=total_hoy_leap, freq='H')
+    
+    else:
+        total_hoy_nonleap = (year_length(freq='H'))    
+        rng = date_range('1/1/2011', periods=total_hoy_nonleap, freq='H')        
+    
+    return rng
\ No newline at end of file
diff --git a/pandas/tseries/tests/test_util.py b/pandas/tseries/tests/test_util.py
index 02a98858ed808..1a445343390d5 100644
--- a/pandas/tseries/tests/test_util.py
+++ b/pandas/tseries/tests/test_util.py
@@ -7,11 +7,64 @@
 import pandas.util.testing as tm
 
 from pandas.tseries.util import pivot_annual, isleapyear
+from pandas.tseries import pivot
 
 class TestPivotAnnual(unittest.TestCase):
     """
     New pandas of scikits.timeseries pivot_annual
     """
+    def test_hourly(self):
+        rng_hourly = date_range('1/1/1994', periods=(18* 8760 + 4*24), freq='H')
+        data_hourly = np.random.randint(100, high=350, size=rng_hourly.size)
+        data_hourly = data_hourly.astype('float64')
+        ts_hourly = Series(data_hourly, index=rng_hourly)
+        
+        annual = pivot.pivot_annual_h(ts_hourly, dt_index=True)
+        
+        ### general
+        ##test first column: if first value and data are the same as first value of timeseries
+        #date
+        def get_mdh(DatetimeIndex, index):
+            #(m, d, h)
+            mdh_tuple = (DatetimeIndex.month[index], DatetimeIndex.day[index], 
+                        DatetimeIndex.hour[index])
+            return mdh_tuple
+#        ts_hourly.index.month[1], ts_hourly.index.month[1], ts_hourly.index.month[1]
+            
+        assert get_mdh(ts_hourly.index, 1) == get_mdh(annual.index, 1)
+        #are the last dates of ts identical with the dates last row in the last column?
+        assert get_mdh(ts_hourly.index[-1]) == get_mdh(annual.index, 
+                                                        (annual.index.size -1))
+        #first values of the ts identical with the first col and last row of the df?        
+        assert ts_hourly[0] == annual.ix[1].values[0]
+        #last values of the ts identical with the last col and last row of the df?        
+        assert ts_hourly[-1] == annual.ix[annual.index.size].values[-1]     
+        ### index
+        ##test if index has the right length
+        assert annual.index[-1] == 8784
+        ##test last column: if first value and data are the same as first value of timeseries
+        ### leap
+        ##test leap offset
+        #leap year: 1996 - are the values of the ts and the 
+        ser96_leap = ts_hourly[(ts_hourly.index.year == 1996) &  
+                          (ts_hourly.index.month == 2) &
+                          (ts_hourly.index.day == 29)                          
+                          ]
+                          
+        df96 = annual[1996]
+        df96_leap = df96[(df96.index.month == 2) & (df96.index.day == 29)]
+        tm.assert_series_equal(ser96_leap, df96_leap)
+        #non-leap year: 1994 - are all values NaN for day 29.02?
+        nan_arr = np.empty(24)
+        nan_arr.fill(np.nan)                  
+        df94 = annual[1994]
+        df94_noleap = df94[(df94.index.month == 2) & (df94.index.day == 29)]
+        np.testing.assert_equal(df94_noleap.values, nan_arr)
+        ### extended functionaliy
+        
+
+   
+    
     def test_daily(self):
         rng = date_range('1/1/2000', '12/31/2004', freq='D')
         ts = Series(np.random.randn(len(rng)), index=rng)
@@ -33,6 +86,7 @@ def test_daily(self):
         leaps.index = leaps.index.year
         tm.assert_series_equal(annual[day].dropna(), leaps)
 
+
     def test_weekly(self):
         pass
 
diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py
index 4b29771233c50..9dc51cbe1e175 100644
--- a/pandas/tseries/util.py
+++ b/pandas/tseries/util.py
@@ -2,6 +2,7 @@
 
 from pandas.core.frame import DataFrame
 import pandas.core.nanops as nanops
+from pandas.tseries.util import isleapyear
 
 def pivot_annual(series, freq=None):
     """

From 6a173d792f300cde6c2356b0e57ae4b71a9d8660 Mon Sep 17 00:00:00 2001
From: TimMi <timmichelsen@gmx-topmail.de>
Date: Thu, 1 Nov 2012 17:44:41 +0100
Subject: [PATCH 2/5] added all the test for the pivot hourly

now the functionality is there, if OK, we could improve the docstrings
---
 pandas/tseries/pivot.py           | 11 ++++++-----
 pandas/tseries/tests/test_util.py | 14 +++++++++++++-
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/pandas/tseries/pivot.py b/pandas/tseries/pivot.py
index 50fd60b2a03ff..20009cbae546d 100644
--- a/pandas/tseries/pivot.py
+++ b/pandas/tseries/pivot.py
@@ -139,7 +139,7 @@ def last_col2front(df, col_no=1):
     
 
 def extended_info(df, time_cols=True, aggreg=True, aggreg_func=None,
-                  datetime_index=False):
+                  ):
     """add extended information to a timeseries pivot
     """
 
@@ -156,18 +156,19 @@ def extended_info(df, time_cols=True, aggreg=True, aggreg_func=None,
         df_extended['sum'] = df_extended[cols].sum(1)
         df_extended['min'] = df_extended[cols].min(1)
         df_extended['max'] = df_extended[cols].max(1)
-        df_extended['max'] = df_extended[cols].std(1)
+        df_extended['std'] = df_extended[cols].std(1)
+    #TODO: how to add more functions in flexible way? check other pandas functions
+    if aggreg_func:
+        df_extended['aggregated'] = df_extended[cols].aggreg_func(1)
 
     #add some metadata
     #TODO: add function to make index a datetime with the argument above using the rng below    
     #TODO: convert the range to lower frequencies and reuse the function.
     rng = default_rng()
     df_extended['doy'] = rng.dayofyear
-#    df_extended = last_col2front(df_extended)
     df_extended['month'] = rng.month
-#    df_extended = last_col2front(df_extended)
     df_extended['day'] = rng.day
-#    df_extended = last_col2front(df_extended)
+    #add 1 to have hours formatted in "natural" and not programming counting     
     df_extended['hour'] = rng.hour + 1
     df_extended = last_col2front(df_extended, col_no=4)
     
diff --git a/pandas/tseries/tests/test_util.py b/pandas/tseries/tests/test_util.py
index 1a445343390d5..64409bc6bb527 100644
--- a/pandas/tseries/tests/test_util.py
+++ b/pandas/tseries/tests/test_util.py
@@ -61,9 +61,21 @@ def get_mdh(DatetimeIndex, index):
         df94_noleap = df94[(df94.index.month == 2) & (df94.index.day == 29)]
         np.testing.assert_equal(df94_noleap.values, nan_arr)
         ### extended functionaliy
+        ext = pivot.extended_info(annual)        
+        ## descriptive statistics
+        #mean        
+        tm.assert_frame_equal(annual.mean(1), ext['mean'])
+        tm.assert_frame_equal(annual.sum(1), ext['sum'])
+        tm.assert_frame_equal(annual.min(1), ext['min'])
+        tm.assert_frame_equal(annual.min(1), ext['max'])
+        tm.assert_frame_equal(annual.std(1), ext['std'])
         
+        ## additional time columns for easier filtering
+        np.testing.assert_equal(ext['doy'].values, annual.index.dayofyear)
+        np.testing.assert_equal(ext['day'].values, annual.index.day)
+        #the hour is incremented by 1
+        np.testing.assert_equal(ext['hour'].values, (annual.index.hour +1))
 
-   
     
     def test_daily(self):
         rng = date_range('1/1/2000', '12/31/2004', freq='D')

From 6814408b0837cf97bf48997ebae2f7676352b02e Mon Sep 17 00:00:00 2001
From: timmie <timmichelsen@gmx-topmail.de>
Date: Tue, 6 Nov 2012 00:35:24 +0100
Subject: [PATCH 3/5] corrected an error in the extended info for df,
 simplified the df tests and corrected indices in tests

---
 pandas/tseries/pivot.py           | 11 +++++-----
 pandas/tseries/tests/test_util.py | 36 +++++++++++++++++--------------
 2 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/pandas/tseries/pivot.py b/pandas/tseries/pivot.py
index 20009cbae546d..9792ef1f60550 100644
--- a/pandas/tseries/pivot.py
+++ b/pandas/tseries/pivot.py
@@ -139,7 +139,7 @@ def last_col2front(df, col_no=1):
     
 
 def extended_info(df, time_cols=True, aggreg=True, aggreg_func=None,
-                  ):
+                  datetime_index=False):
     """add extended information to a timeseries pivot
     """
 
@@ -157,18 +157,17 @@ def extended_info(df, time_cols=True, aggreg=True, aggreg_func=None,
         df_extended['min'] = df_extended[cols].min(1)
         df_extended['max'] = df_extended[cols].max(1)
         df_extended['std'] = df_extended[cols].std(1)
-    #TODO: how to add more functions in flexible way? check other pandas functions
-    if aggreg_func:
-        df_extended['aggregated'] = df_extended[cols].aggreg_func(1)
 
     #add some metadata
     #TODO: add function to make index a datetime with the argument above using the rng below    
     #TODO: convert the range to lower frequencies and reuse the function.
     rng = default_rng()
     df_extended['doy'] = rng.dayofyear
+#    df_extended = last_col2front(df_extended)
     df_extended['month'] = rng.month
+#    df_extended = last_col2front(df_extended)
     df_extended['day'] = rng.day
-    #add 1 to have hours formatted in "natural" and not programming counting     
+#    df_extended = last_col2front(df_extended)
     df_extended['hour'] = rng.hour + 1
     df_extended = last_col2front(df_extended, col_no=4)
     
@@ -205,4 +204,4 @@ def default_rng(freq='H', leap=True):
         total_hoy_nonleap = (year_length(freq='H'))    
         rng = date_range('1/1/2011', periods=total_hoy_nonleap, freq='H')        
     
-    return rng
\ No newline at end of file
+    return rng
diff --git a/pandas/tseries/tests/test_util.py b/pandas/tseries/tests/test_util.py
index 5294b97f32de3..2548714fe76ec 100644
--- a/pandas/tseries/tests/test_util.py
+++ b/pandas/tseries/tests/test_util.py
@@ -12,9 +12,10 @@
 from pandas.tseries.util import pivot_annual, isleapyear
 from pandas.tseries import pivot
 
-class TestPivotAnnual(unittest.TestCase):
+
+class TestPivotAnnualHourly(unittest.TestCase):
     """
-    New pandas of scikits.timeseries pivot_annual
+    New pandas of scikits.timeseries pivot_annual for hourly with a new shape
     """
     def test_hourly(self):
         rng_hourly = date_range('1/1/1994', periods=(18* 8760 + 4*24), freq='H')
@@ -36,15 +37,14 @@ def get_mdh(DatetimeIndex, index):
             
         assert get_mdh(ts_hourly.index, 1) == get_mdh(annual.index, 1)
         #are the last dates of ts identical with the dates last row in the last column?
-        assert get_mdh(ts_hourly.index[-1]) == get_mdh(annual.index, 
-                                                        (annual.index.size -1))
-        #first values of the ts identical with the first col and last row of the df?        
-        assert ts_hourly[0] == annual.ix[1].values[0]
+        assert get_mdh(ts_hourly.index, -1) == get_mdh(annual.index, (annual.index.size -1))
+        #first values of the ts identical with the first col?        
+        assert ts_hourly[0] == annual.ix[0].values[0]
         #last values of the ts identical with the last col and last row of the df?        
-        assert ts_hourly[-1] == annual.ix[annual.index.size].values[-1]     
-        ### index
+        assert ts_hourly[-1] == annual.ix[-1].values[-1]     
+        #### index
         ##test if index has the right length
-        assert annual.index[-1] == 8784
+        assert annual.index.size == 8784
         ##test last column: if first value and data are the same as first value of timeseries
         ### leap
         ##test leap offset
@@ -56,7 +56,7 @@ def get_mdh(DatetimeIndex, index):
                           
         df96 = annual[1996]
         df96_leap = df96[(df96.index.month == 2) & (df96.index.day == 29)]
-        tm.assert_series_equal(ser96_leap, df96_leap)
+        np.testing.assert_equal(ser96_leap.values, df96_leap.values)
         #non-leap year: 1994 - are all values NaN for day 29.02?
         nan_arr = np.empty(24)
         nan_arr.fill(np.nan)                  
@@ -67,19 +67,23 @@ def get_mdh(DatetimeIndex, index):
         ext = pivot.extended_info(annual)        
         ## descriptive statistics
         #mean        
-        tm.assert_frame_equal(annual.mean(1), ext['mean'])
-        tm.assert_frame_equal(annual.sum(1), ext['sum'])
-        tm.assert_frame_equal(annual.min(1), ext['min'])
-        tm.assert_frame_equal(annual.min(1), ext['max'])
-        tm.assert_frame_equal(annual.std(1), ext['std'])
+        np.testing.assert_equal(annual.mean(1).values, ext['mean'].values)
+        np.testing.assert_equal(annual.sum(1).values, ext['sum'].values)
+        np.testing.assert_equal(annual.min(1).values, ext['min'].values)
+        np.testing.assert_equal(annual.max(1).values, ext['max'].values)
+        np.testing.assert_equal(annual.std(1).values, ext['std'].values)
         
         ## additional time columns for easier filtering
         np.testing.assert_equal(ext['doy'].values, annual.index.dayofyear)
         np.testing.assert_equal(ext['day'].values, annual.index.day)
         #the hour is incremented by 1
         np.testing.assert_equal(ext['hour'].values, (annual.index.hour +1))
-
     
+
+class TestPivotAnnual(unittest.TestCase):
+    """
+    New pandas of scikits.timeseries pivot_annual
+    """
     def test_daily(self):
         rng = date_range('1/1/2000', '12/31/2004', freq='D')
         ts = Series(np.random.randn(len(rng)), index=rng)

From ed0659f3cba74e79a5932201f9c8b266e6d4539f Mon Sep 17 00:00:00 2001
From: TimMi <timmichelsen@gmx-topmail.de>
Date: Wed, 7 Nov 2012 14:20:21 +0100
Subject: [PATCH 4/5] minor restructuring of the code

---
 pandas/tseries/pivot.py | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/pandas/tseries/pivot.py b/pandas/tseries/pivot.py
index 9792ef1f60550..6249fa1fa12b0 100644
--- a/pandas/tseries/pivot.py
+++ b/pandas/tseries/pivot.py
@@ -66,44 +66,43 @@ def pivot_annual_h(series, freq=None, dt_index=False):
         dummy_df = DataFrame(values, index=hoy_leap_list, 
                         columns=years)
                         
-        ##get offset for leap hours
-        
+        ##prepare the index for inserting the values into the result dataframe
+        #get offset for leap hours
         #see:
         #http://stackoverflow.com/questions/2004364/increment-numpy-array-with-repeated-indices
         #1994-02-28 23:00:00 -> index 1415
-        ind_z = np.array(range(0, 8760))
-        ind_i = np.array(range(1416,8760 ))
+        index_nonleap = np.array(range(0, 8760))
+        index_leapshift = np.array(range(1416,8760 ))
         
-        ind_t = ind_z.copy()
-        ind_t[ind_i]+=24
+        index_incl_leap = index_nonleap.copy()
+        #shift index by 24 (hours) for leap
+        index_incl_leap[index_leapshift]+=24
         
-        #TODO: beautify variable names
+        # select data for the respective year
         for year in years:
             
-            # select data for the respective year
-            ser_sel = series[ series.index.year == year]
-            info = (ser_sel).values
-            
-            
+            #select the data for the respective year
+            series_year = series[ series.index.year == year]
+            #create a array with the values for the respecive year
+            values = (series_year).values
             
             if isleapyear(year):
-                dummy_df[year] = info
+                dummy_df[year] = values
             else:
-                data = np.empty((total_hoy_leap), dtype=series.dtype)
-                data.fill(np.nan)
-                
-                ser_sel = series[ series.index.year == year]
-                info = (ser_sel).values
+                #dummy array to be filled with non-leap values
+                dummy_array = np.empty((total_hoy_leap), dtype=series.dtype)
+                dummy_array.fill(np.nan)
                 
-                data.put(ind_t, (series[ series.index.year == year]).values)
+                #fill dummy array with values leaving the leap day
+                dummy_array.put(index_incl_leap, values)
                 
-                dummy_df[year] = data
+                dummy_df[year] = dummy_array
                 
         res_df = dummy_df
         
         #assign a datetime index, CAUTION: the year is definatly wrong!
         if dt_index:
-            rng = default_rng()            
+            rng = default_rng(freq='H', leap=True)            
             res_df = DataFrame(res_df.values, index=rng, 
                                columns=res_df.columns)
         

From a997751552ae2a270caede5ee55054cef99b210d Mon Sep 17 00:00:00 2001
From: TimMi <timmichelsen@gmx-topmail.de>
Date: Wed, 7 Nov 2012 16:28:17 +0100
Subject: [PATCH 5/5] additional minor edits

---
 pandas/tseries/pivot.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tseries/pivot.py b/pandas/tseries/pivot.py
index 6249fa1fa12b0..632e6bdab4324 100644
--- a/pandas/tseries/pivot.py
+++ b/pandas/tseries/pivot.py
@@ -59,10 +59,10 @@ def pivot_annual_h(series, freq=None, dt_index=False):
         hoy_leap_list = range(1, (total_hoy_leap + 1 ))
         
         
-        
+        #create a array template
         values = np.empty((total_hoy_leap, len(years)), dtype=series.dtype)
         values.fill(np.nan)
-        
+        #create a df to receive the resulting data
         dummy_df = DataFrame(values, index=hoy_leap_list, 
                         columns=years)
                         
@@ -100,7 +100,7 @@ def pivot_annual_h(series, freq=None, dt_index=False):
                 
         res_df = dummy_df
         
-        #assign a datetime index, CAUTION: the year is definatly wrong!
+        #assign a pseudo datetime index , CAUTION: the year is definitely wrong!
         if dt_index:
             rng = default_rng(freq='H', leap=True)            
             res_df = DataFrame(res_df.values, index=rng,