BUG: handle columns argument in DataFrame.to_html, use statsmodels.api to avoid deprecation warnings, close #890

wesm · wesm · commit edc90b3d8f8d · 2012-03-11T12:26:54.000-04:00
diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -253,13 +253,13 @@ def write_tr(buf, l, indent=0, indent_delta=4, header=False):
         def _column_header():
             row = [''] * (frame.index.nlevels - 1)
 
-            if isinstance(frame.columns, MultiIndex):
+            if isinstance(self.columns, MultiIndex):
                 if self.has_column_names:
-                    row.append(single_column_table(frame.columns.names))
-                row.extend([single_column_table(c) for c in frame.columns])
+                    row.append(single_column_table(self.columns.names))
+                row.extend([single_column_table(c) for c in self.columns])
             else:
-                row.append(frame.columns.name or '')
-                row.extend(frame.columns)
+                row.append(self.columns.name or '')
+                row.extend(self.columns)
             return row
 
         if len(frame.columns) == 0 or len(frame.index) == 0:
@@ -282,7 +282,7 @@ def _column_header():
                 indent += indent_delta
                 write_tr(buf, col_row, indent, indent_delta, header=True)
                 if self.has_index_names:
-                    row = frame.index.names + [''] * len(frame.columns)
+                    row = frame.index.names + [''] * len(self.columns)
                     write_tr(buf, row, indent, indent_delta, header=True)
 
                 write(buf, '</thead>', indent)
@@ -299,7 +299,7 @@ def _maybe_bold_row(x):
                     return x
 
             fmt_values = {}
-            for col in frame.columns:
+            for col in self.columns:
                 fmt_values[col] = self._format_col(col)
 
             # write values
@@ -309,7 +309,7 @@ def _maybe_bold_row(x):
                     row.extend(_maybe_bold_row(frame.index[i]))
                 else:
                     row.append(_maybe_bold_row(frame.index[i]))
-                for col in frame.columns:
+                for col in self.columns:
                     row.append(fmt_values[col][i])
                 write_tr(buf, row, indent, indent_delta)
             indent -= indent_delta
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -3931,7 +3931,8 @@ def _bar_plot(self, axes, subplots=False, use_index=True, grid=True,
         if legend and not subplots:
             fig = ax.get_figure()
             fig.legend([r[0] for r in rects], labels, loc='upper center',
-                       fancybox=True, ncol=6, mode='expand')
+                       fancybox=True, ncol=6)
+                       #mode='expand')
 
         import matplotlib.pyplot as plt
         plt.subplots_adjust(top=0.8)
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -833,8 +833,6 @@ def _convert_grouper(axis, grouper):
 
 class SeriesGroupBy(GroupBy):
 
-    _cythonized_methods = set(['add', 'mean'])
-
     def aggregate(self, func_or_funcs, *args, **kwargs):
         """
         Apply aggregation function or functions to groups, yielding most likely
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2121,6 +2121,21 @@ def plot(self, label=None, kind='line', use_index=True, rot=30, ax=None,
             ax.set_xticklabels([gfx._stringify(key) for key in self.index],
                                rotation=rot,
                                fontsize=fontsize)
+        elif kind == 'barh':
+            yinds = np.arange(N) + 0.25
+            ax.barh(yinds, self.values.astype(float), 0.5,
+                    left=np.zeros(N), linewidth=1, **kwds)
+
+            if N < 10:
+                fontsize = 12
+            else:
+                fontsize = 10
+
+            ax.set_yticks(yinds + 0.25)
+            ax.set_yticklabels([gfx._stringify(key) for key in self.index],
+                               rotation=rot,
+                               fontsize=fontsize)
+
         ax.grid(grid)
         plt.draw_if_interactive()
 
diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py
@@ -38,7 +38,11 @@ class OLS(object):
 
     def __init__(self, y, x, intercept=True, weights=None, nw_lags=None,
                  nw_overlap=False):
-        import scikits.statsmodels.api as sm
+        try:
+            import statsmodels.api as sm
+        except ImportError:
+            import scikits.statsmodels.api as sm
+
         self._x_orig = x
         self._y_orig = y
         self._weights_orig = weights
diff --git a/pandas/stats/tests/common.py b/pandas/stats/tests/common.py
@@ -38,10 +38,14 @@ def check_for_scipy():
         raise nose.SkipTest('no scipy')
 
 def check_for_statsmodels():
+    _have_statsmodels = True
     try:
-        import scikits.statsmodels as sm
-    except Exception:
-        raise nose.SkipTest('no statsmodels')
+        import statsmodels.api as sm
+    except ImportError:
+        try:
+            import scikits.statsmodels.api as sm
+        except ImportError:
+            raise nose.SkipTest('no statsmodels')
 
 
 class BaseTest(unittest.TestCase):
diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py
@@ -8,6 +8,7 @@
 
 from datetime import datetime
 import unittest
+import nose
 import numpy as np
 
 from pandas.core.panel import Panel
@@ -21,10 +22,14 @@
 
 from common import BaseTest
 
+_have_statsmodels = True
 try:
-    import scikits.statsmodels.api as sm
+    import statsmodels.api as sm
 except ImportError:
-    pass
+    try:
+        import scikits.statsmodels.api as sm
+    except ImportError:
+        _have_statsmodels = False
 
 def _check_repr(obj):
     repr(obj)
@@ -60,10 +65,7 @@ def setUpClass(cls):
         except ImportError:
             pass
 
-        try:
-            import scikits.statsmodels.api as _
-        except ImportError:
-            import nose
+        if not _have_statsmodels:
             raise nose.SkipTest
 
     def testOLSWithDatasets(self):
@@ -149,8 +151,7 @@ def checkOLS(self, exog, endog, x, y):
         _check_non_raw_results(result)
 
     def checkMovingOLS(self, window_type, x, y, weights=None, **kwds):
-        from scikits.statsmodels.tools.tools import rank
-        window = rank(x.values) * 2
+        window = sm.tools.tools.rank(x.values) * 2
 
         moving = ols(y=y, x=x, weights=weights, window_type=window_type,
                      window=window, **kwds)
@@ -232,10 +233,7 @@ class TestOLSMisc(unittest.TestCase):
     '''
     @classmethod
     def setupClass(cls):
-        try:
-            import scikits.statsmodels.api as _
-        except ImportError:
-            import nose
+        if not _have_statsmodels:
             raise nose.SkipTest
 
     def test_f_test(self):
diff --git a/pandas/stats/tests/test_var.py b/pandas/stats/tests/test_var.py
@@ -7,8 +7,13 @@
 
 raise nose.SkipTest('skipping this for now')
 
-import scikits.statsmodels.tsa.var as sm_var
-import scikits.statsmodels as sm
+try:
+    import statsmodels.tsa.var as sm_var
+    import statsmodels as sm
+except ImportError:
+    import scikits.statsmodels.tsa.var as sm_var
+    import scikits.statsmodels as sm
+
 
 import pandas.stats.var as _pvar
 reload(_pvar)
diff --git a/pandas/stats/var.py b/pandas/stats/var.py
@@ -23,7 +23,10 @@ class VAR(object):
     """
 
     def __init__(self, data, p=1, intercept=True):
-        import scikits.statsmodels.tsa.var as sm_var
+        try:
+            import statsmodels.tsa.var as sm_var
+        except ImportError:
+            import scikits.statsmodels.tsa.var as sm_var
 
         self._data = DataFrame(_combine_rhs(data))
         self._p = p
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
@@ -353,6 +353,10 @@ def test_to_html_with_no_bold(self):
         ashtml = x.to_html(bold_rows=False)
         assert('<strong>' not in ashtml)
 
+    def test_to_html_columns_arg(self):
+        result = self.frame.to_html(columns=['A'])
+        self.assert_('<th>B</th>' not in result)
+
     def test_repr_html(self):
         self.frame._repr_html_()
 
diff --git a/scripts/file_sizes.py b/scripts/file_sizes.py
@@ -11,12 +11,10 @@
 names = []
 lengths = []
 
-# if len(sys.argv) > 1:
-#     loc = sys.argv[1]
-# else:
-#     loc = '.'
-
-loc = 'pandas'
+if len(sys.argv) > 1:
+    loc = sys.argv[1]
+else:
+    loc = '.'
 walked = os.walk(loc)
 
 def _should_count_file(path):
@@ -192,9 +190,9 @@ def doit2():
 ax.hist(all_counts, bins=100)
 n = len(all_counts)
 nmore = (all_counts > 50).sum()
-ax.set_title('%s function lengths, n=%d' % (loc, n))
+ax.set_title('%s function lengths, n=%d' % ('pandas', n))
 ax.set_ylabel('N functions')
 ax.set_xlabel('Function length')
-ax.text(60, 200, '%.3f%% with > 50 lines' % ((n - nmore) / float(n)),
+ax.text(100, 300, '%.3f%% with > 50 lines' % ((n - nmore) / float(n)),
         fontsize=18)
 plt.show()
diff --git a/scripts/git_code_churn.py b/scripts/git_code_churn.py
@@ -8,92 +8,8 @@
 
 from pandas import *
 
-repo_path = '/home/wesm/code/pandas'
-githist = ('git log --pretty=format:\"%h %ad | %s%d [%an]\" --date=short ' +
-           repo_path + ' > githist.txt')
-
-def rungithist():
-    os.system(githist)
-
-def get_commit_history():
-    # return TimeSeries
-
-    rungithist()
-
-    githist = open('githist.txt').read()
-    os.remove('githist.txt')
-
-    sha_date = []
-    for line in githist.split('\n'):
-        sha_date.append(line.split()[:2])
-
-    shas, dates = zip(*sha_date)
-
-    hists = dict(zip(shas, githist.split('\n')))
-
-    dates = [parser.parse(d) for d in dates]
-
-    return Series(dates, shas), hists
-
-def get_commit_churn(sha, prev_sha):
-    stdout = subprocess.Popen(['git', 'diff', sha, prev_sha, '--numstat'],
-                              stdout=subprocess.PIPE).stdout
-
-    stdout = stdout.read()
-
-    insertions = {}
-    deletions = {}
-
-    for line in stdout.split('\n'):
-        try:
-            i, d, path = line.split('\t')
-            insertions[path] = int(i)
-            deletions[path] = int(d)
-        except: # EAFP
-            pass
-
-    # statline = stdout.split('\n')[-2]
-
-    # match = re.match('.*\s(.*)\sinsertions.*\s(.*)\sdeletions', statline)
-
-    # insertions = int(match.group(1))
-    # deletions = int(match.group(2))
-
-    return insertions, deletions
-
-def get_code_churn(commits):
-    shas = commits.index[::-1]
-
-    prev = shas[0]
-
-    insertions = [np.nan]
-    deletions = [np.nan]
-
-    insertions = {}
-    deletions = {}
-
-    for cur in shas[1:]:
-        i, d = get_commit_churn(cur, prev)
-
-        insertions[cur] = i
-        deletions[cur] = d
-
-        # insertions.append(i)
-        # deletions.append(d)
-
-        prev = cur
-
-    return Panel({'insertions' : DataFrame(insertions),
-                  'deletions' : DataFrame(deletions)}, minor_axis=shas)
-
-
-    # return DataFrame({'insertions' : insertions,
-    #                   'deletions' : deletions}, index=shas)
 
 if __name__ == '__main__':
-    # commits, hists = get_commit_history()
-    # churn = get_code_churn(commits)
-
     from vbench.git import GitRepo
     repo = GitRepo('/Users/wesm/code/pandas')
     churn = repo.get_churn_by_file()