pandas-dev
diff --git a/‎pandas/algos.pyx
+35 b/‎pandas/algos.pyx
+35
diff --git a/‎pandas/core/frame.py
+8-4 b/‎pandas/core/frame.py
+8-4
diff --git a/‎pandas/core/groupby.py
+1 b/‎pandas/core/groupby.py
+1
diff --git a/‎pandas/core/index.py
+3-3 b/‎pandas/core/index.py
+3-3
diff --git a/‎pandas/core/internals.py
+7-6 b/‎pandas/core/internals.py
+7-6
diff --git a/‎pandas/core/nanops.py
+7-5 b/‎pandas/core/nanops.py
+7-5
diff --git a/‎pandas/core/reshape.py
+2-1 b/‎pandas/core/reshape.py
+2-1
diff --git a/‎pandas/core/series.py
+4-3 b/‎pandas/core/series.py
+4-3
diff --git a/‎pandas/hashtable.pyx
+2-2 b/‎pandas/hashtable.pyx
+2-2
diff --git a/‎pandas/io/pytables.py
+17-13 b/‎pandas/io/pytables.py
+17-13
diff --git a/‎pandas/sparse/array.py
+2-1 b/‎pandas/sparse/array.py
+2-1
diff --git a/‎pandas/src/inference.pyx
-14 b/‎pandas/src/inference.pyx
-14
@@ -1708,6 +1708,41 @@ def roll_generic(ndarray[float64_t, cast=True] input, int win,
 #----------------------------------------------------------------------
 # group operations
 
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def is_lexsorted(list list_of_arrays):
+    cdef:
+        int i
+        Py_ssize_t n, nlevels
+        int64_t k, cur, pre
+        ndarray arr
+
+    nlevels = len(list_of_arrays)
+    n = len(list_of_arrays[0])
+
+    cdef int64_t **vecs = <int64_t**> malloc(nlevels * sizeof(int64_t*))
+    for i from 0 <= i < nlevels:
+        # vecs[i] = <int64_t *> (<ndarray> list_of_arrays[i]).data
+
+        arr = list_of_arrays[i]
+        vecs[i] = <int64_t *> arr.data
+    # assume uniqueness??
+
+    for i from 1 <= i < n:
+        for k from 0 <= k < nlevels:
+            cur = vecs[k][i]
+            pre = vecs[k][i-1]
+            if cur == pre:
+                continue
+            elif cur > pre:
+                break
+            else:
+                return False
+    free(vecs)
+    return True
+
+
 @cython.boundscheck(False)
 def groupby_indices(ndarray values):
     cdef:
 
@@ -42,7 +42,10 @@
 import pandas.core.format as fmt
 import pandas.core.generic as generic
 import pandas.core.nanops as nanops
+
 import pandas.lib as lib
+import pandas.tslib as tslib
+import pandas.algos as _algos
 
 from pandas.core.config import get_option
 
@@ -380,7 +383,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
             mask = ma.getmaskarray(data)
             datacopy = ma.copy(data)
             if issubclass(data.dtype.type, np.datetime64):
-                datacopy[mask] = lib.iNaT
+                datacopy[mask] = tslib.iNaT
             else:
                 datacopy = com._maybe_upcast(datacopy)
                 datacopy[mask] = NA
@@ -4306,7 +4309,8 @@ def corr(self, method='pearson', min_periods=None):
         mat = numeric_df.values
 
         if method == 'pearson':
-            correl = lib.nancorr(com._ensure_float64(mat), minp=min_periods)
+            correl = _algos.nancorr(com._ensure_float64(mat),
+                                    minp=min_periods)
         else:
             if min_periods is None:
                 min_periods = 1
@@ -4357,8 +4361,8 @@ def cov(self, min_periods=None):
             else:
                 baseCov = np.cov(mat.T)
         else:
-            baseCov = lib.nancorr(com._ensure_float64(mat), cov=True,
-                                  minp=min_periods)
+            baseCov = _algos.nancorr(com._ensure_float64(mat), cov=True,
+                                     minp=min_periods)
 
         return self._constructor(baseCov, index=cols, columns=cols)
 
 
@@ -17,6 +17,7 @@
 
 import pandas.lib as lib
 import pandas.algos as _algos
+import pandas.hashtable as _hash
 
 _agg_doc = """Aggregate using input function or dict of {column -> function}
 
 
@@ -203,7 +203,7 @@ def to_datetime(self, dayfirst=False):
         if self.inferred_type == 'string':
             from dateutil.parser import parse
             parser = lambda x: parse(x, dayfirst=dayfirst)
-            parsed = tslib.try_parse_dates(self.values, parser=parser)
+            parsed = lib.try_parse_dates(self.values, parser=parser)
             return DatetimeIndex(parsed)
         else:
             return DatetimeIndex(self.values)
@@ -727,7 +727,7 @@ def get_value(self, series, key):
                 raise
 
             try:
-                return lib.get_value_box(series, key)
+                return tslib.get_value_box(series, key)
             except IndexError:
                 raise
             except TypeError:
@@ -1526,7 +1526,7 @@ def get_value(self, series, key):
                 pass
 
             try:
-                return lib.get_value_at(series, key)
+                return _index.get_value_at(series, key)
             except IndexError:
                 raise
             except TypeError:
 
@@ -7,13 +7,14 @@
 from pandas.core.index import Index, _ensure_index, _handle_legacy_indexes
 import pandas.core.common as com
 import pandas.lib as lib
+import pandas.tslib as tslib
 
 from pandas.util import py3compat
 
 class Block(object):
     """
-    Canonical n-dimensional unit of homogeneous dtype contained in a pandas data
-    structure
+    Canonical n-dimensional unit of homogeneous dtype contained in a pandas
+    data structure
 
     Index-ignorant; let the container take care of that
     """
@@ -399,7 +400,7 @@ class DatetimeBlock(Block):
 
     def __init__(self, values, items, ref_items, ndim=2):
         if values.dtype != _NS_DTYPE:
-            values = lib.cast_to_nanoseconds(values)
+            values = tslib.cast_to_nanoseconds(values)
 
         Block.__init__(self, values, items, ref_items, ndim=ndim)
 
@@ -429,14 +430,14 @@ def set(self, item, value):
         loc = self.items.get_loc(item)
 
         if value.dtype != _NS_DTYPE:
-            value = lib.cast_to_nanoseconds(value)
+            value = tslib.cast_to_nanoseconds(value)
 
         self.values[loc] = value
 
     def get_values(self, dtype):
         if dtype == object:
             flat_i8 = self.values.ravel().view(np.int64)
-            res = lib.ints_to_pydatetime(flat_i8)
+            res = tslib.ints_to_pydatetime(flat_i8)
             return res.reshape(self.values.shape)
         return self.values
 
@@ -1300,7 +1301,7 @@ def form_blocks(arrays, names, axes):
             complex_items.append((k, v))
         elif issubclass(v.dtype.type, np.datetime64):
             if v.dtype != _NS_DTYPE:
-                v = lib.cast_to_nanoseconds(v)
+                v = tslib.cast_to_nanoseconds(v)
 
             if hasattr(v, 'tz') and v.tz is not None:
                 object_items.append((k, v))
 
@@ -5,6 +5,8 @@
 from pandas.core.common import isnull, notnull
 import pandas.core.common as com
 import pandas.lib as lib
+import pandas.algos as algos
+import pandas.hashtable as _hash
 
 try:
     import bottleneck as bn
@@ -121,7 +123,7 @@ def get_median(x):
         mask = notnull(x)
         if not skipna and not mask.all():
             return np.nan
-        return lib.median(x[mask])
+        return algos.median(x[mask])
 
     if values.dtype != np.float64:
         values = values.astype('f8')
@@ -494,17 +496,17 @@ def unique1d(values):
     Hash table-based unique
     """
     if np.issubdtype(values.dtype, np.floating):
-        table = lib.Float64HashTable(len(values))
+        table = _hash.Float64HashTable(len(values))
         uniques = np.array(table.unique(com._ensure_float64(values)),
                            dtype=np.float64)
     elif np.issubdtype(values.dtype, np.datetime64):
-        table = lib.Int64HashTable(len(values))
+        table = _hash.Int64HashTable(len(values))
         uniques = table.unique(com._ensure_int64(values))
         uniques = uniques.view('M8[ns]')
     elif np.issubdtype(values.dtype, np.integer):
-        table = lib.Int64HashTable(len(values))
+        table = _hash.Int64HashTable(len(values))
         uniques = table.unique(com._ensure_int64(values))
     else:
-        table = lib.PyObjectHashTable(len(values))
+        table = _hash.PyObjectHashTable(len(values))
         uniques = table.unique(com._ensure_object(values))
     return uniques
@@ -14,6 +14,7 @@
                                  decons_group_index)
 import pandas.core.common as com
 import pandas.lib as lib
+import pandas.algos as algos
 
 
 from pandas.core.index import MultiIndex
@@ -87,7 +88,7 @@ def _make_sorted_values_labels(self):
         comp_index, obs_ids = _compress_group_index(group_index)
         ngroups = len(obs_ids)
 
-        indexer = lib.groupsort_indexer(comp_index, ngroups)[0]
+        indexer = algos.groupsort_indexer(comp_index, ngroups)[0]
         indexer = _ensure_platform_int(indexer)
 
         self.sorted_values = com.take_2d(self.values, indexer, axis=0)
 
@@ -32,6 +32,7 @@
 from pandas.util.decorators import Appender, Substitution, cache_readonly
 
 import pandas.lib as lib
+import pandas.tslib as tslib
 import pandas.index as _index
 
 from pandas.compat.scipy import scoreatpercentile as _quantile
@@ -808,7 +809,7 @@ def iget_value(self, i):
         value : scalar (int) or Series (slice, sequence)
         """
         try:
-            return lib.get_value_at(self, i)
+            return _index.get_value_at(self, i)
         except IndexError:
             raise
         except:
@@ -819,7 +820,7 @@ def iget_value(self, i):
                 if isinstance(label, Index):
                     return self.reindex(label)
                 else:
-                    return lib.get_value_at(self, i)
+                    return _index.get_value_at(self, i)
 
     iget = iget_value
     irow = iget_value
@@ -2986,7 +2987,7 @@ def _try_cast(arr):
                     not com.is_datetime64_dtype(dtype)):
                     if dtype == object:
                         ints = np.asarray(data).view('i8')
-                        subarr = lib.ints_to_pydatetime(ints)
+                        subarr = tslib.ints_to_pydatetime(ints)
                     elif raise_cast_failure:
                         raise TypeError('Cannot cast datetime64 to %s' % dtype)
                 else:
 
@@ -565,7 +565,7 @@ cdef class Float64HashTable(HashTable):
         labels = self.get_labels(values, uniques, 0, -1)
         return uniques.to_array(), labels
 
-    def get_lables(self, ndarray[float64_t] values,
+    def get_labels(self, ndarray[float64_t] values,
                      Float64Vector uniques,
                      Py_ssize_t count_prior, int64_t na_sentinel):
         cdef:
@@ -779,7 +779,7 @@ cdef class PyObjectHashTable(HashTable):
 
         return result
 
-    def get_lables(self, ndarray[object] values, ObjectVector uniques,
+    def get_labels(self, ndarray[object] values, ObjectVector uniques,
                      Py_ssize_t count_prior, int64_t na_sentinel):
         cdef:
             Py_ssize_t i, n = len(values)
 
@@ -29,6 +29,9 @@
 from pandas.tools.merge import concat
 
 import pandas.lib as lib
+import pandas.algos as algos
+import pandas.tslib as tslib
+
 from contextlib import contextmanager
 
 # reading and writing the full object in one go
@@ -609,9 +612,9 @@ def _write_index(self, group, key, index):
                 node._v_attrs.freq = index.freq
 
             if hasattr(index, 'tz') and index.tz is not None:
-                zone = lib.get_timezone(index.tz)
+                zone = tslib.get_timezone(index.tz)
                 if zone is None:
-                    zone = lib.tot_seconds(index.tz.utcoffset())
+                    zone = tslib.tot_seconds(index.tz.utcoffset())
                 node._v_attrs.tz = zone
 
     def _read_index(self, group, key):
@@ -1276,26 +1279,27 @@ def delete(self, where = None, **kwargs):
         raise NotImplementedError("cannot delete on an abstract table")
 
 class WORMTable(Table):
-    """ a write-once read-many table:
-         this format DOES NOT ALLOW appending to a table. writing is a one-time operation
-         the data are stored in a format that allows for searching the data on disk
+    """ a write-once read-many table: this format DOES NOT ALLOW appending to a
+         table. writing is a one-time operation the data are stored in a format
+         that allows for searching the data on disk
          """
     table_type = 'worm'
 
     def read(self, **kwargs):
-        """ read the indicies and the indexing array, calculate offset rows and return """
+        """ read the indicies and the indexing array, calculate offset rows and
+        return """
         raise NotImplementedError("WORMTable needs to implement read")
 
     def write(self, **kwargs):
-        """ write in a format that we can search later on (but cannot append to):
-               write out the indicies and the values using _write_array (e.g. a CArray)
-               create an indexing table so that we can search """
+        """ write in a format that we can search later on (but cannot append
+               to): write out the indicies and the values using _write_array
+               (e.g. a CArray) create an indexing table so that we can search"""
         raise NotImplementedError("WORKTable needs to implement write")
 
 class LegacyTable(Table):
-    """ an appendable table:
-          allow append/query/delete operations to a (possibily) already existing appendable table
-          this table ALLOWS append (but doesn't require them), and stores the data in a format
+    """ an appendable table: allow append/query/delete operations to a
+          (possibily) already existing appendable table this table ALLOWS
+          append (but doesn't require them), and stores the data in a format
           that can be easily searched
 
         """
@@ -1318,7 +1322,7 @@ def read(self, where=None):
 
         panels = []
         if len(unique(key)) == len(key):
-            sorter, _ = lib.groupsort_indexer(com._ensure_int64(key), J * K)
+            sorter, _ = algos.groupsort_indexer(com._ensure_int64(key), J * K)
             sorter = com._ensure_platform_int(sorter)
 
             # create the panels
 
@@ -15,6 +15,7 @@
 from pandas._sparse import BlockIndex, IntIndex
 import pandas._sparse as splib
 import pandas.lib as lib
+import pandas.index as _index
 
 
 def _sparse_op_wrap(op, name):
@@ -264,7 +265,7 @@ def _get_val_at(self, loc):
         if sp_loc == -1:
             return self.fill_value
         else:
-            return lib.get_value_at(self, sp_loc)
+            return _index.get_value_at(self, sp_loc)
 
     def take(self, indices, axis=0):
         """
 
@@ -294,20 +294,6 @@ def is_period_array(ndarray[object] values):
             return False
     return True
 
-def extract_ordinals(ndarray[object] values, freq):
-    cdef:
-        Py_ssize_t i, n = len(values)
-        ndarray[int64_t] ordinals = np.empty(n, dtype=np.int64)
-        object p
-
-    for i in range(n):
-        p = values[i]
-        ordinals[i] = p.ordinal
-        if p.freq != freq:
-            raise ValueError("%s is wrong freq" % p)
-
-    return ordinals
-
 
 cdef extern from "parse_helper.h":
     inline int floatify(object, double *result) except -1