pandas-dev · sahildua2305 · Jul 12, 2016 · Jul 13, 2016 · Jul 13, 2016 · Jul 13, 2016
diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
@@ -77,11 +77,11 @@
         // On conda install pytables, otherwise tables
         {"environment_type": "conda", "tables": ""},
         {"environment_type": "conda", "pytables": null},
-        {"environment_type": "virtualenv", "tables": null},
-        {"environment_type": "virtualenv", "pytables": ""},
+        {"environment_type": "(?!conda).*", "tables": null},
+        {"environment_type": "(?!conda).*", "pytables": ""},
         // On conda&win32, install libpython
         {"sys_platform": "(?!win32).*", "libpython": ""},
-        {"sys_platform": "win32", "libpython": null},
+        {"environment_type": "conda", "sys_platform": "win32", "libpython": null},
         {"environment_type": "(?!conda).*", "libpython": ""}
     ],
     "include": [],

diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py
@@ -63,6 +63,27 @@ def time_index_datetime_union(self):
         self.rng.union(self.rng2)
 
 
+class index_datetime_set_difference(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.N = 100000
+        self.A = self.N - 20000
+        self.B = self.N + 20000
+        self.idx1 = DatetimeIndex(range(self.N))
+        self.idx2 = DatetimeIndex(range(self.A, self.B))
+        self.idx3 = DatetimeIndex(range(self.N, self.B))
+
+    def time_index_datetime_difference(self):
+        self.idx1.difference(self.idx2)
+
+    def time_index_datetime_difference_disjoint(self):
+        self.idx1.difference(self.idx3)
+
+    def time_index_datetime_symmetric_difference(self):
+        self.idx1.symmetric_difference(self.idx2)
+
+
 class index_float64_boolean_indexer(object):
     goal_time = 0.2
 
@@ -183,6 +204,40 @@ def time_index_int64_union(self):
         self.left.union(self.right)
 
 
+class index_int64_set_difference(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.N = 500000
+        self.options = np.arange(self.N)
+        self.left = Index(self.options.take(
+            np.random.permutation(self.N)[:(self.N // 2)]))
+        self.right = Index(self.options.take(
+            np.random.permutation(self.N)[:(self.N // 2)]))
+
+    def time_index_int64_difference(self):
+        self.left.difference(self.right)
+
+    def time_index_int64_symmetric_difference(self):
+        self.left.symmetric_difference(self.right)
+
+
+class index_str_set_difference(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.N = 10000
+        self.strs = tm.rands_array(10, self.N)
+        self.left = Index(self.strs[:self.N * 2 // 3])
+        self.right = Index(self.strs[self.N // 3:])
+
+    def time_str_difference(self):
+        self.left.difference(self.right)
+
+    def time_str_symmetric_difference(self):
+        self.left.symmetric_difference(self.right)
+
+
 class index_str_boolean_indexer(object):
     goal_time = 0.2
 

diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
@@ -19,24 +19,6 @@ def time_dataframe_getitem_scalar(self):
         self.df[self.col][self.idx]
 
 
-class datamatrix_getitem_scalar(object):
-    goal_time = 0.2
-
-    def setup(self):
-        try:
-            self.klass = DataMatrix
-        except:
-            self.klass = DataFrame
-        self.index = tm.makeStringIndex(1000)
-        self.columns = tm.makeStringIndex(30)
-        self.df = self.klass(np.random.rand(1000, 30), index=self.index, columns=self.columns)
-        self.idx = self.index[100]
-        self.col = self.columns[10]
-
-    def time_datamatrix_getitem_scalar(self):
-        self.df[self.col][self.idx]
-
-
 class series_get_value(object):
     goal_time = 0.2
 
@@ -498,5 +480,3 @@ def setup(self):
 
     def time_float_loc(self):
         self.ind.get_loc(0)
-
-
diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py
@@ -143,12 +143,12 @@ class to_numeric(object):
 
     param_names = ['data', 'downcast']
     params = [
-        [(['1'] * N / 2) + ([2] * N / 2),
-         (['-1'] * N / 2) + ([2] * N / 2),
-         np.repeat(np.array('1970-01-01', '1970-01-02',
+        [(['1'] * (N / 2)) + ([2] * (N / 2)),
+         (['-1'] * (N / 2)) + ([2] * (N / 2)),
+         np.repeat(np.array(['1970-01-01', '1970-01-02'],
                             dtype='datetime64[D]'), N),
-         (['1.1'] * N / 2) + ([2] * N / 2),
-         ([1] * N / 2) + ([2] * N / 2),
+         (['1.1'] * (N / 2)) + ([2] * (N / 2)),
+         ([1] * (N / 2)) + ([2] * (N / 2)),
          np.repeat(np.int32(1), N)],
         [None, 'integer', 'signed', 'unsigned', 'float'],
     ]

diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
@@ -179,10 +179,6 @@ def setup(self):
             self.df_multi = DataFrame(np.random.randn(len(self.index2), 4), index=self.index2, columns=['A', 'B', 'C', 'D'])
         except:
             pass
-        try:
-            self.DataFrame = DataMatrix
-        except:
-            pass
         self.df = pd.DataFrame({'data1': np.random.randn(100000), 'data2': np.random.randn(100000), 'key1': self.key1, 'key2': self.key2, })
         self.df_key1 = pd.DataFrame(np.random.randn(len(self.level1), 4), index=self.level1, columns=['A', 'B', 'C', 'D'])
         self.df_key2 = pd.DataFrame(np.random.randn(len(self.level2), 4), index=self.level2, columns=['A', 'B', 'C', 'D'])
@@ -210,10 +206,6 @@ def setup(self):
             self.df_multi = DataFrame(np.random.randn(len(self.index2), 4), index=self.index2, columns=['A', 'B', 'C', 'D'])
         except:
             pass
-        try:
-            self.DataFrame = DataMatrix
-        except:
-            pass
         self.df = pd.DataFrame({'data1': np.random.randn(100000), 'data2': np.random.randn(100000), 'key1': self.key1, 'key2': self.key2, })
         self.df_key1 = pd.DataFrame(np.random.randn(len(self.level1), 4), index=self.level1, columns=['A', 'B', 'C', 'D'])
         self.df_key2 = pd.DataFrame(np.random.randn(len(self.level2), 4), index=self.level2, columns=['A', 'B', 'C', 'D'])
@@ -241,10 +233,6 @@ def setup(self):
             self.df_multi = DataFrame(np.random.randn(len(self.index2), 4), index=self.index2, columns=['A', 'B', 'C', 'D'])
         except:
             pass
-        try:
-            self.DataFrame = DataMatrix
-        except:
-            pass
         self.df = pd.DataFrame({'data1': np.random.randn(100000), 'data2': np.random.randn(100000), 'key1': self.key1, 'key2': self.key2, })
         self.df_key1 = pd.DataFrame(np.random.randn(len(self.level1), 4), index=self.level1, columns=['A', 'B', 'C', 'D'])
         self.df_key2 = pd.DataFrame(np.random.randn(len(self.level2), 4), index=self.level2, columns=['A', 'B', 'C', 'D'])
@@ -272,10 +260,6 @@ def setup(self):
             self.df_multi = DataFrame(np.random.randn(len(self.index2), 4), index=self.index2, columns=['A', 'B', 'C', 'D'])
         except:
             pass
-        try:
-            self.DataFrame = DataMatrix
-        except:
-            pass
         self.df = pd.DataFrame({'data1': np.random.randn(100000), 'data2': np.random.randn(100000), 'key1': self.key1, 'key2': self.key2, })
         self.df_key1 = pd.DataFrame(np.random.randn(len(self.level1), 4), index=self.level1, columns=['A', 'B', 'C', 'D'])
         self.df_key2 = pd.DataFrame(np.random.randn(len(self.level2), 4), index=self.level2, columns=['A', 'B', 'C', 'D'])

diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py
@@ -1,4 +1,4 @@
-from pandas import PeriodIndex, date_range
+from pandas import Series, Period, PeriodIndex, date_range
 
 
 class create_period_index_from_date_range(object):
@@ -7,3 +7,27 @@ class create_period_index_from_date_range(object):
     def time_period_index(self):
         # Simulate irregular PeriodIndex
         PeriodIndex(date_range('1985', periods=1000).to_pydatetime(), freq='D')
+
+
+class period_algorithm(object):
+    goal_time = 0.2
+
+    def setup(self):
+        data = [Period('2011-01', freq='M'), Period('2011-02', freq='M'),
+                Period('2011-03', freq='M'), Period('2011-04', freq='M')]
+        self.s = Series(data * 1000)
+        self.i = PeriodIndex(data, freq='M')
+
+    def time_period_series_drop_duplicates(self):
+        self.s.drop_duplicates()
+
+    def time_period_index_drop_duplicates(self):
+        self.i.drop_duplicates()
+
+    def time_period_series_value_counts(self):
+        self.s.value_counts()
+
+    def time_period_index_value_counts(self):
+        self.i.value_counts()
+
+
diff --git a/ci/lint.sh b/ci/lint.sh
@@ -8,7 +8,7 @@ RET=0
 
 if [ "$LINT" ]; then
     echo "Linting"
-    for path in 'core' 'indexes' 'types' 'formats' 'io' 'stats' 'compat' 'sparse' 'tools' 'tseries' 'tests' 'computation' 'util'
+    for path in 'api' 'core' 'indexes' 'types' 'formats' 'io' 'stats' 'compat' 'sparse' 'tools' 'tseries' 'tests' 'computation' 'util'
     do
         echo "linting -> pandas/$path"
         flake8 pandas/$path --filename '*.py'
@@ -17,7 +17,19 @@ if [ "$LINT" ]; then
         fi
 
     done
-    echo "Linting DONE"
+    echo "Linting *.py DONE"
+
+    echo "Linting *.pyx"
+    for path in 'window.pyx'
+    do
+        echo "linting -> pandas/$path"
+        flake8 pandas/$path --filename '*.pyx' --select=E501,E302,E203,E226,E111,E114,E221,E303,E128,E231,E126,E128
+        if [ $? -ne "0" ]; then
+            RET=1
+        fi
+
+    done
+    echo "Linting *.pyx DONE"
 
     echo "Check for invalid testing"
     grep -r -E --include '*.py' --exclude nosetester.py --exclude testing.py '(numpy|np)\.testing' pandas

diff --git a/ci/requirements-2.7_DOC_BUILD.run b/ci/requirements-2.7_DOC_BUILD.run
@@ -1,4 +1,4 @@
-ipython
+ipython=4.2.0
 ipykernel
 sphinx
 nbconvert

diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -380,6 +380,7 @@ Reindexing / Selection / Label manipulation
    Series.reindex
    Series.reindex_like
    Series.rename
+   Series.rename_axis
    Series.reset_index
    Series.sample
    Series.select
@@ -889,6 +890,7 @@ Reindexing / Selection / Label manipulation
    DataFrame.reindex_axis
    DataFrame.reindex_like
    DataFrame.rename
+   DataFrame.rename_axis
    DataFrame.reset_index
    DataFrame.sample
    DataFrame.select

diff --git a/doc/source/computation.rst b/doc/source/computation.rst
@@ -391,6 +391,91 @@ For some windowing functions, additional parameters must be specified:
     such that the weights are normalized with respect to each other. Weights
     of ``[1, 1, 1]`` and ``[2, 2, 2]`` yield the same result.
 
+.. _stats.moments.ts:
+
+Time-aware Rolling
+~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 0.19.0
+
+New in version 0.19.0 are the ability to pass an offset (or convertible) to a ``.rolling()`` method and have it produce
+variable sized windows based on the passed time window. For each time point, this includes all preceding values occurring
+within the indicated time delta.
+
+This can be particularly useful for a non-regular time frequency index.
+
+.. ipython:: python
+
+   dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},
+                      index=pd.date_range('20130101 09:00:00', periods=5, freq='s'))
+   dft
+
+This is a regular frequency index. Using an integer window parameter works to roll along the window frequency.
+
+.. ipython:: python
+
+   dft.rolling(2).sum()
+   dft.rolling(2, min_periods=1).sum()
+
+Specifying an offset allows a more intuitive specification of the rolling frequency.
+
+.. ipython:: python
+
+   dft.rolling('2s').sum()
+
+Using a non-regular, but still monotonic index, rolling with an integer window does not impart any special calculation.
+
+
+.. ipython:: python
+
+
+   dft = DataFrame({'B': [0, 1, 2, np.nan, 4]},
+                   index = pd.Index([pd.Timestamp('20130101 09:00:00'),
+                                     pd.Timestamp('20130101 09:00:02'),
+                                     pd.Timestamp('20130101 09:00:03'),
+                                     pd.Timestamp('20130101 09:00:05'),
+                                     pd.Timestamp('20130101 09:00:06')],
+                                    name='foo'))
+
+   dft
+   dft.rolling(2).sum()
+
+
+Using the time-specification generates variable windows for this sparse data.
+
+.. ipython:: python
+
+   dft.rolling('2s').sum()
+
+Furthermore, we now allow an optional ``on`` parameter to specify a column (rather than the
+default of the index) in a DataFrame.
+
+.. ipython:: python
+
+   dft = dft.reset_index()
+   dft
+   dft.rolling('2s', on='foo').sum()
+
+.. _stats.moments.ts-versus-resampling:
+
+Time-aware Rolling vs. Resampling
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Using ``.rolling()`` with a time-based index is quite similar to :ref:`resampling <timeseries.resampling>`. They
+both operate and perform reductive operations on time-indexed pandas objects.
+
+When using ``.rolling()`` with an offset. The offset is a time-delta. Take a backwards-in-time looking window, and
+aggregate all of the values in that window (including the end-point, but not the start-point). This is the new value
+at that point in the result. These are variable sized windows in time-space for each point of the input. You will get
+a same sized result as the input.
+
+When using ``.resample()`` with an offset. Construct a new index that is the frequency of the offset. For each frequency
+bin, aggregate points from the input within a backwards-in-time looking window that fall in that bin. The result of this
+aggregation is the output for that frequency point. The windows are fixed size size in the frequency space. Your result
+will have the shape of a regular frequency between the min and the max of the original input object.
+
+To summarize, ``.rolling()`` is a time-based window operation, while ``.resample()`` is a frequency-based window operation.
+
 Centering Windows
 ~~~~~~~~~~~~~~~~~
 

diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst
@@ -679,6 +679,19 @@ The :ref:`Pivot <reshaping.pivot>` docs.
                        'Employed' : lambda x : sum(x),
                        'Grade' : lambda x : sum(x) / len(x)})
 
+`Plot pandas DataFrame with year over year data
+<http://stackoverflow.com/questions/30379789/plot-pandas-data-frame-with-year-over-year-data>`__
+
+To create year and month crosstabulation:
+
+.. ipython:: python
+
+   df = pd.DataFrame({'value': np.random.randn(36)},
+                     index=pd.date_range('2011-01-01', freq='M', periods=36))
+
+   pd.pivot_table(df, index=df.index.month, columns=df.index.year,
+                  values='value', aggfunc='sum')
+
 Apply
 *****
 

diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
@@ -93,6 +93,12 @@ targets the IPython Notebook environment.
 
 `Plotly’s <https://plot.ly/>`__ `Python API <https://plot.ly/python/>`__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js <http://d3js.org/>`__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn <https://plot.ly/python/matplotlib-to-plotly-tutorial/>`__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks <https://plot.ly/ipython-notebooks/>`__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `cloud <https://plot.ly/product/plans/>`__, `offline <https://plot.ly/python/offline/>`__, or `on-premise <https://plot.ly/product/enterprise/>`__ accounts for private use.
 
+`Pandas-Qt <https://github.com/datalyze-solutions/pandas-qt>`__
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Spun off from the main pandas library, the `Pandas-Qt <https://github.com/datalyze-solutions/pandas-qt>`__
+library enables DataFrame visualization and manipulation in PyQt4 and PySide applications.
+
 .. _ecosystem.ide:
 
 IDE