REF: promote parser.pyx into top level

wesm · wesm · commit f89b491bb4ec · 2013-06-02T13:51:12.000-07:00
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -20,7 +20,7 @@
 
 import pandas.lib as lib
 import pandas.tslib as tslib
-import pandas._parser as _parser
+import pandas.parser as _parser
 from pandas.tseries.period import Period
 import json
 
@@ -752,7 +752,7 @@ def __init__(self, kwds):
                         [ com.is_integer(i) for i in self.index_col ]) or com.is_integer(self.index_col)):
                     raise Exception("index_col must only contain row numbers "
                                     "when specifying a multi-index header")
-                
+
         self._name_processed = False
 
     @property
@@ -783,7 +783,7 @@ def _extract_multi_indexer_columns(self, header, index_names, col_names, passed_
 
         # the names are the tuples of the header that are not the index cols
         # 0 is the name of the index, assuming index_col is a list of column
-        # numbers 
+        # numbers
         ic = self.index_col
         if ic is None:
             ic = []
@@ -795,8 +795,8 @@ def _extract_multi_indexer_columns(self, header, index_names, col_names, passed_
         orig_header = list(header)
 
         # clean the index_names
-        index_names = header.pop(-1) 
-        (index_names, names, 
+        index_names = header.pop(-1)
+        (index_names, names,
          index_col) = _clean_index_names(index_names, self.index_col)
 
         # extract the columns
@@ -846,7 +846,7 @@ def _make_index(self, data, alldata, columns, indexnamerow=False):
             index = self._get_complex_date_index(data, columns)
             index = self._agg_index(index, try_parse_dates=False)
 
-        # add names for the index 
+        # add names for the index
         if indexnamerow:
             coffset = len(indexnamerow) - len(columns)
             index.names = indexnamerow[:coffset]
@@ -1060,7 +1060,7 @@ def __init__(self, src, **kwds):
                 _is_index_col(self.index_col)):
 
                 self._name_processed = True
-                (index_names, self.names, 
+                (index_names, self.names,
                  self.index_col) = _clean_index_names(self.names, self.index_col)
 
                 if self.index_names is None:
@@ -1498,7 +1498,7 @@ def _infer_columns(self):
                             if cur_count > 0:
                                 this_columns[i] = '%s.%d' % (col, cur_count)
                             counts[col] = cur_count + 1
-        
+
                 columns.append(this_columns)
 
             self._clear_buffer()
@@ -1993,5 +1993,3 @@ def __init__(self, path_or_buf, kind=None, **kwds):
         from warnings import warn
         warn("ExcelFile can now be imported from: pandas.io.excel", FutureWarning)
         super(ExcelFile, self).__init__(path_or_buf, kind=kind, **kwds)
-
-  
diff --git a/pandas/io/tests/test_cparser.py b/pandas/io/tests/test_cparser.py
@@ -27,8 +27,8 @@
 
 import pandas.util.testing as tm
 
-from pandas._parser import TextReader
-import pandas._parser as parser
+from pandas.parser import TextReader
+import pandas.parser as parser
 
 
 class TestCParser(unittest.TestCase):
diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
@@ -17,7 +17,10 @@
 from pandas.io.html import _BeautifulSoupHtml5LibFrameParser
 from pandas.io.html import _BeautifulSoupLxmlFrameParser, _remove_whitespace
 from pandas import DataFrame, MultiIndex, read_csv, Timestamp
-from pandas.util.testing import assert_frame_equal, network, get_data_path
+from pandas.util.testing import (assert_frame_equal, network,
+                                 get_data_path)
+from numpy.testing.decorators import slow
+
 from pandas.util.testing import makeCustomDataframe as mkdf
 
 
@@ -93,6 +96,7 @@ def run_read_html(self, *args, **kwargs):
         return _run_read_html(parser, *args, **kwargs)
 
     @network
+    @slow
     def test_banklist_url(self):
         url = 'http://www.fdic.gov/bank/individual/failed/banklist.html'
         df1 = self.run_read_html(url, 'First Federal Bank of Florida',
@@ -102,6 +106,7 @@ def test_banklist_url(self):
         assert_framelist_equal(df1, df2)
 
     @network
+    @slow
     def test_spam_url(self):
         url = ('http://ndb.nal.usda.gov/ndb/foods/show/1732?fg=&man=&'
                'lfacet=&format=&count=&max=25&offset=&sort=&qlookup=spam')
@@ -361,6 +366,7 @@ def test_multiple_matches(self):
         self.assertGreater(len(dfs), 1)
 
     @network
+    @slow
     def test_pythonxy_plugins_table(self):
         url = 'http://code.google.com/p/pythonxy/wiki/StandardPlugins'
         dfs = self.run_read_html(url, match='Python',
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -34,7 +34,7 @@
 
 from numpy.testing.decorators import slow
 
-from pandas._parser import OverflowError
+from pandas.parser import OverflowError
 
 
 class ParserTests(object):
@@ -536,7 +536,7 @@ def test_nat_parse(self):
 
         # GH 3062
         df = DataFrame(dict({
-                    'A' : np.asarray(range(10),dtype='float64'), 
+                    'A' : np.asarray(range(10),dtype='float64'),
                     'B' : pd.Timestamp('20010101') }))
         df.iloc[3:6,:] = np.nan
 
@@ -1025,19 +1025,19 @@ def test_header_multi_index(self):
         #### invalid options ####
 
         # no as_recarray
-        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3], 
+        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3],
                           index_col=[0,1], as_recarray=True, tupleize_cols=False)
 
         # names
-        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3], 
+        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3],
                           index_col=[0,1], names=['foo','bar'], tupleize_cols=False)
         # usecols
-        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3], 
+        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3],
                           index_col=[0,1], usecols=['foo','bar'], tupleize_cols=False)
         # non-numeric index_col
-        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3], 
+        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3],
                           index_col=['foo','bar'], tupleize_cols=False)
-        
+
     def test_pass_names_with_index(self):
         lines = self.data1.split('\n')
         no_header = '\n'.join(lines[1:])
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
diff --git a/setup.py b/setup.py
@@ -286,7 +286,7 @@ class CheckSDist(sdist):
                  'pandas/tslib.pyx',
                  'pandas/index.pyx',
                  'pandas/algos.pyx',
-                 'pandas/src/parser.pyx',
+                 'pandas/parser.pyx',
                  'pandas/src/sparse.pyx']
 
     def initialize_options(self):
@@ -412,6 +412,12 @@ def pxd(name):
                        'pandas/src/datetime/np_datetime_strings.c']},
     algos={'pyxfile': 'algos',
            'depends': [srcpath('generated', suffix='.pyx')]},
+    parser=dict(pyxfile='parser',
+                depends=['pandas/src/parser/tokenizer.h',
+                         'pandas/src/parser/io.h',
+                         'pandas/src/numpy_helper.h'],
+                sources=['pandas/src/parser/tokenizer.c',
+                         'pandas/src/parser/io.c'])
 )
 
 extensions = []
@@ -440,16 +446,6 @@ def pxd(name):
                        libraries=libraries)
 
 
-parser_ext = Extension('pandas._parser',
-                       depends=['pandas/src/parser/tokenizer.h',
-                                'pandas/src/parser/io.h',
-                                'pandas/src/numpy_helper.h'],
-                       sources=[srcpath('parser', suffix=suffix),
-                                'pandas/src/parser/tokenizer.c',
-                                'pandas/src/parser/io.c',
-                                ],
-                       include_dirs=common_include)
-
 sandbox_ext = Extension('pandas._sandbox',
                         sources=[srcpath('sandbox', suffix=suffix)],
                         include_dirs=common_include)
@@ -460,7 +456,7 @@ def pxd(name):
                            sources=[srcpath('cppsandbox', suffix=suffix)],
                            include_dirs=[])
 
-extensions.extend([sparse_ext, parser_ext])
+extensions.extend([sparse_ext])
 
 # if not ISRELEASED:
 #     extensions.extend([sandbox_ext])