Skip to content

Commit 5c78ecb

Browse files
committed
TST: test suite passes\!
1 parent 45e478a commit 5c78ecb

21 files changed

+156
-98
lines changed

pandas/algos.pyx

+35
Original file line numberDiff line numberDiff line change
@@ -1708,6 +1708,41 @@ def roll_generic(ndarray[float64_t, cast=True] input, int win,
17081708
#----------------------------------------------------------------------
17091709
# group operations
17101710

1711+
1712+
@cython.wraparound(False)
1713+
@cython.boundscheck(False)
1714+
def is_lexsorted(list list_of_arrays):
1715+
cdef:
1716+
int i
1717+
Py_ssize_t n, nlevels
1718+
int64_t k, cur, pre
1719+
ndarray arr
1720+
1721+
nlevels = len(list_of_arrays)
1722+
n = len(list_of_arrays[0])
1723+
1724+
cdef int64_t **vecs = <int64_t**> malloc(nlevels * sizeof(int64_t*))
1725+
for i from 0 <= i < nlevels:
1726+
# vecs[i] = <int64_t *> (<ndarray> list_of_arrays[i]).data
1727+
1728+
arr = list_of_arrays[i]
1729+
vecs[i] = <int64_t *> arr.data
1730+
# assume uniqueness??
1731+
1732+
for i from 1 <= i < n:
1733+
for k from 0 <= k < nlevels:
1734+
cur = vecs[k][i]
1735+
pre = vecs[k][i-1]
1736+
if cur == pre:
1737+
continue
1738+
elif cur > pre:
1739+
break
1740+
else:
1741+
return False
1742+
free(vecs)
1743+
return True
1744+
1745+
17111746
@cython.boundscheck(False)
17121747
def groupby_indices(ndarray values):
17131748
cdef:

pandas/core/frame.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,10 @@
4242
import pandas.core.format as fmt
4343
import pandas.core.generic as generic
4444
import pandas.core.nanops as nanops
45+
4546
import pandas.lib as lib
47+
import pandas.tslib as tslib
48+
import pandas.algos as _algos
4649

4750
from pandas.core.config import get_option
4851

@@ -380,7 +383,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
380383
mask = ma.getmaskarray(data)
381384
datacopy = ma.copy(data)
382385
if issubclass(data.dtype.type, np.datetime64):
383-
datacopy[mask] = lib.iNaT
386+
datacopy[mask] = tslib.iNaT
384387
else:
385388
datacopy = com._maybe_upcast(datacopy)
386389
datacopy[mask] = NA
@@ -4306,7 +4309,8 @@ def corr(self, method='pearson', min_periods=None):
43064309
mat = numeric_df.values
43074310

43084311
if method == 'pearson':
4309-
correl = lib.nancorr(com._ensure_float64(mat), minp=min_periods)
4312+
correl = _algos.nancorr(com._ensure_float64(mat),
4313+
minp=min_periods)
43104314
else:
43114315
if min_periods is None:
43124316
min_periods = 1
@@ -4357,8 +4361,8 @@ def cov(self, min_periods=None):
43574361
else:
43584362
baseCov = np.cov(mat.T)
43594363
else:
4360-
baseCov = lib.nancorr(com._ensure_float64(mat), cov=True,
4361-
minp=min_periods)
4364+
baseCov = _algos.nancorr(com._ensure_float64(mat), cov=True,
4365+
minp=min_periods)
43624366

43634367
return self._constructor(baseCov, index=cols, columns=cols)
43644368

pandas/core/groupby.py

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import pandas.lib as lib
1919
import pandas.algos as _algos
20+
import pandas.hashtable as _hash
2021

2122
_agg_doc = """Aggregate using input function or dict of {column -> function}
2223

pandas/core/index.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def to_datetime(self, dayfirst=False):
203203
if self.inferred_type == 'string':
204204
from dateutil.parser import parse
205205
parser = lambda x: parse(x, dayfirst=dayfirst)
206-
parsed = tslib.try_parse_dates(self.values, parser=parser)
206+
parsed = lib.try_parse_dates(self.values, parser=parser)
207207
return DatetimeIndex(parsed)
208208
else:
209209
return DatetimeIndex(self.values)
@@ -727,7 +727,7 @@ def get_value(self, series, key):
727727
raise
728728

729729
try:
730-
return lib.get_value_box(series, key)
730+
return tslib.get_value_box(series, key)
731731
except IndexError:
732732
raise
733733
except TypeError:
@@ -1526,7 +1526,7 @@ def get_value(self, series, key):
15261526
pass
15271527

15281528
try:
1529-
return lib.get_value_at(series, key)
1529+
return _index.get_value_at(series, key)
15301530
except IndexError:
15311531
raise
15321532
except TypeError:

pandas/core/internals.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@
77
from pandas.core.index import Index, _ensure_index, _handle_legacy_indexes
88
import pandas.core.common as com
99
import pandas.lib as lib
10+
import pandas.tslib as tslib
1011

1112
from pandas.util import py3compat
1213

1314
class Block(object):
1415
"""
15-
Canonical n-dimensional unit of homogeneous dtype contained in a pandas data
16-
structure
16+
Canonical n-dimensional unit of homogeneous dtype contained in a pandas
17+
data structure
1718
1819
Index-ignorant; let the container take care of that
1920
"""
@@ -399,7 +400,7 @@ class DatetimeBlock(Block):
399400

400401
def __init__(self, values, items, ref_items, ndim=2):
401402
if values.dtype != _NS_DTYPE:
402-
values = lib.cast_to_nanoseconds(values)
403+
values = tslib.cast_to_nanoseconds(values)
403404

404405
Block.__init__(self, values, items, ref_items, ndim=ndim)
405406

@@ -429,14 +430,14 @@ def set(self, item, value):
429430
loc = self.items.get_loc(item)
430431

431432
if value.dtype != _NS_DTYPE:
432-
value = lib.cast_to_nanoseconds(value)
433+
value = tslib.cast_to_nanoseconds(value)
433434

434435
self.values[loc] = value
435436

436437
def get_values(self, dtype):
437438
if dtype == object:
438439
flat_i8 = self.values.ravel().view(np.int64)
439-
res = lib.ints_to_pydatetime(flat_i8)
440+
res = tslib.ints_to_pydatetime(flat_i8)
440441
return res.reshape(self.values.shape)
441442
return self.values
442443

@@ -1300,7 +1301,7 @@ def form_blocks(arrays, names, axes):
13001301
complex_items.append((k, v))
13011302
elif issubclass(v.dtype.type, np.datetime64):
13021303
if v.dtype != _NS_DTYPE:
1303-
v = lib.cast_to_nanoseconds(v)
1304+
v = tslib.cast_to_nanoseconds(v)
13041305

13051306
if hasattr(v, 'tz') and v.tz is not None:
13061307
object_items.append((k, v))

pandas/core/nanops.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from pandas.core.common import isnull, notnull
66
import pandas.core.common as com
77
import pandas.lib as lib
8+
import pandas.algos as algos
9+
import pandas.hashtable as _hash
810

911
try:
1012
import bottleneck as bn
@@ -121,7 +123,7 @@ def get_median(x):
121123
mask = notnull(x)
122124
if not skipna and not mask.all():
123125
return np.nan
124-
return lib.median(x[mask])
126+
return algos.median(x[mask])
125127

126128
if values.dtype != np.float64:
127129
values = values.astype('f8')
@@ -494,17 +496,17 @@ def unique1d(values):
494496
Hash table-based unique
495497
"""
496498
if np.issubdtype(values.dtype, np.floating):
497-
table = lib.Float64HashTable(len(values))
499+
table = _hash.Float64HashTable(len(values))
498500
uniques = np.array(table.unique(com._ensure_float64(values)),
499501
dtype=np.float64)
500502
elif np.issubdtype(values.dtype, np.datetime64):
501-
table = lib.Int64HashTable(len(values))
503+
table = _hash.Int64HashTable(len(values))
502504
uniques = table.unique(com._ensure_int64(values))
503505
uniques = uniques.view('M8[ns]')
504506
elif np.issubdtype(values.dtype, np.integer):
505-
table = lib.Int64HashTable(len(values))
507+
table = _hash.Int64HashTable(len(values))
506508
uniques = table.unique(com._ensure_int64(values))
507509
else:
508-
table = lib.PyObjectHashTable(len(values))
510+
table = _hash.PyObjectHashTable(len(values))
509511
uniques = table.unique(com._ensure_object(values))
510512
return uniques

pandas/core/reshape.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
decons_group_index)
1515
import pandas.core.common as com
1616
import pandas.lib as lib
17+
import pandas.algos as algos
1718

1819

1920
from pandas.core.index import MultiIndex
@@ -87,7 +88,7 @@ def _make_sorted_values_labels(self):
8788
comp_index, obs_ids = _compress_group_index(group_index)
8889
ngroups = len(obs_ids)
8990

90-
indexer = lib.groupsort_indexer(comp_index, ngroups)[0]
91+
indexer = algos.groupsort_indexer(comp_index, ngroups)[0]
9192
indexer = _ensure_platform_int(indexer)
9293

9394
self.sorted_values = com.take_2d(self.values, indexer, axis=0)

pandas/core/series.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from pandas.util.decorators import Appender, Substitution, cache_readonly
3333

3434
import pandas.lib as lib
35+
import pandas.tslib as tslib
3536
import pandas.index as _index
3637

3738
from pandas.compat.scipy import scoreatpercentile as _quantile
@@ -808,7 +809,7 @@ def iget_value(self, i):
808809
value : scalar (int) or Series (slice, sequence)
809810
"""
810811
try:
811-
return lib.get_value_at(self, i)
812+
return _index.get_value_at(self, i)
812813
except IndexError:
813814
raise
814815
except:
@@ -819,7 +820,7 @@ def iget_value(self, i):
819820
if isinstance(label, Index):
820821
return self.reindex(label)
821822
else:
822-
return lib.get_value_at(self, i)
823+
return _index.get_value_at(self, i)
823824

824825
iget = iget_value
825826
irow = iget_value
@@ -2986,7 +2987,7 @@ def _try_cast(arr):
29862987
not com.is_datetime64_dtype(dtype)):
29872988
if dtype == object:
29882989
ints = np.asarray(data).view('i8')
2989-
subarr = lib.ints_to_pydatetime(ints)
2990+
subarr = tslib.ints_to_pydatetime(ints)
29902991
elif raise_cast_failure:
29912992
raise TypeError('Cannot cast datetime64 to %s' % dtype)
29922993
else:

pandas/hashtable.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ cdef class Float64HashTable(HashTable):
565565
labels = self.get_labels(values, uniques, 0, -1)
566566
return uniques.to_array(), labels
567567

568-
def get_lables(self, ndarray[float64_t] values,
568+
def get_labels(self, ndarray[float64_t] values,
569569
Float64Vector uniques,
570570
Py_ssize_t count_prior, int64_t na_sentinel):
571571
cdef:
@@ -779,7 +779,7 @@ cdef class PyObjectHashTable(HashTable):
779779

780780
return result
781781

782-
def get_lables(self, ndarray[object] values, ObjectVector uniques,
782+
def get_labels(self, ndarray[object] values, ObjectVector uniques,
783783
Py_ssize_t count_prior, int64_t na_sentinel):
784784
cdef:
785785
Py_ssize_t i, n = len(values)

pandas/io/pytables.py

+17-13
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@
2929
from pandas.tools.merge import concat
3030

3131
import pandas.lib as lib
32+
import pandas.algos as algos
33+
import pandas.tslib as tslib
34+
3235
from contextlib import contextmanager
3336

3437
# reading and writing the full object in one go
@@ -609,9 +612,9 @@ def _write_index(self, group, key, index):
609612
node._v_attrs.freq = index.freq
610613

611614
if hasattr(index, 'tz') and index.tz is not None:
612-
zone = lib.get_timezone(index.tz)
615+
zone = tslib.get_timezone(index.tz)
613616
if zone is None:
614-
zone = lib.tot_seconds(index.tz.utcoffset())
617+
zone = tslib.tot_seconds(index.tz.utcoffset())
615618
node._v_attrs.tz = zone
616619

617620
def _read_index(self, group, key):
@@ -1276,26 +1279,27 @@ def delete(self, where = None, **kwargs):
12761279
raise NotImplementedError("cannot delete on an abstract table")
12771280

12781281
class WORMTable(Table):
1279-
""" a write-once read-many table:
1280-
this format DOES NOT ALLOW appending to a table. writing is a one-time operation
1281-
the data are stored in a format that allows for searching the data on disk
1282+
""" a write-once read-many table: this format DOES NOT ALLOW appending to a
1283+
table. writing is a one-time operation the data are stored in a format
1284+
that allows for searching the data on disk
12821285
"""
12831286
table_type = 'worm'
12841287

12851288
def read(self, **kwargs):
1286-
""" read the indicies and the indexing array, calculate offset rows and return """
1289+
""" read the indicies and the indexing array, calculate offset rows and
1290+
return """
12871291
raise NotImplementedError("WORMTable needs to implement read")
12881292

12891293
def write(self, **kwargs):
1290-
""" write in a format that we can search later on (but cannot append to):
1291-
write out the indicies and the values using _write_array (e.g. a CArray)
1292-
create an indexing table so that we can search """
1294+
""" write in a format that we can search later on (but cannot append
1295+
to): write out the indicies and the values using _write_array
1296+
(e.g. a CArray) create an indexing table so that we can search"""
12931297
raise NotImplementedError("WORKTable needs to implement write")
12941298

12951299
class LegacyTable(Table):
1296-
""" an appendable table:
1297-
allow append/query/delete operations to a (possibily) already existing appendable table
1298-
this table ALLOWS append (but doesn't require them), and stores the data in a format
1300+
""" an appendable table: allow append/query/delete operations to a
1301+
(possibily) already existing appendable table this table ALLOWS
1302+
append (but doesn't require them), and stores the data in a format
12991303
that can be easily searched
13001304
13011305
"""
@@ -1318,7 +1322,7 @@ def read(self, where=None):
13181322

13191323
panels = []
13201324
if len(unique(key)) == len(key):
1321-
sorter, _ = lib.groupsort_indexer(com._ensure_int64(key), J * K)
1325+
sorter, _ = algos.groupsort_indexer(com._ensure_int64(key), J * K)
13221326
sorter = com._ensure_platform_int(sorter)
13231327

13241328
# create the panels

pandas/sparse/array.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from pandas._sparse import BlockIndex, IntIndex
1616
import pandas._sparse as splib
1717
import pandas.lib as lib
18+
import pandas.index as _index
1819

1920

2021
def _sparse_op_wrap(op, name):
@@ -264,7 +265,7 @@ def _get_val_at(self, loc):
264265
if sp_loc == -1:
265266
return self.fill_value
266267
else:
267-
return lib.get_value_at(self, sp_loc)
268+
return _index.get_value_at(self, sp_loc)
268269

269270
def take(self, indices, axis=0):
270271
"""

pandas/src/inference.pyx

-14
Original file line numberDiff line numberDiff line change
@@ -294,20 +294,6 @@ def is_period_array(ndarray[object] values):
294294
return False
295295
return True
296296

297-
def extract_ordinals(ndarray[object] values, freq):
298-
cdef:
299-
Py_ssize_t i, n = len(values)
300-
ndarray[int64_t] ordinals = np.empty(n, dtype=np.int64)
301-
object p
302-
303-
for i in range(n):
304-
p = values[i]
305-
ordinals[i] = p.ordinal
306-
if p.freq != freq:
307-
raise ValueError("%s is wrong freq" % p)
308-
309-
return ordinals
310-
311297

312298
cdef extern from "parse_helper.h":
313299
inline int floatify(object, double *result) except -1

0 commit comments

Comments
 (0)