diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c3a654b01022c..e1d59f807a7fd 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -21,14 +21,7 @@ from cpython cimport (Py_INCREF, PyTuple_SET_ITEM, PyBytes_Check, PyUnicode_Check, PyTuple_New, - PyObject_RichCompareBool, - PyBytes_GET_SIZE, - PyUnicode_GET_SIZE) - -try: - from cpython cimport PyString_GET_SIZE -except ImportError: - from cpython cimport PyUnicode_GET_SIZE as PyString_GET_SIZE + PyObject_RichCompareBool) cimport cpython @@ -38,7 +31,7 @@ from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, PyDateTime_IMPORT) PyDateTime_IMPORT -from tslib import NaT, Timestamp, Timedelta, array_to_datetime +from tslib import NaT, array_to_datetime from missing cimport checknull @@ -127,28 +120,6 @@ def item_from_zerodim(object val): return util.unbox_if_zerodim(val) -@cython.wraparound(False) -@cython.boundscheck(False) -def fast_unique(ndarray[object] values): - cdef: - Py_ssize_t i, n = len(values) - list uniques = [] - dict table = {} - object val, stub = 0 - - for i from 0 <= i < n: - val = values[i] - if val not in table: - table[val] = stub - uniques.append(val) - try: - uniques.sort() - except Exception: - pass - - return uniques - - @cython.wraparound(False) @cython.boundscheck(False) def fast_unique_multiple(list arrays): @@ -368,30 +339,6 @@ def has_infs_f8(ndarray[float64_t] arr): return False -def convert_timestamps(ndarray values): - cdef: - object val, f, result - dict cache = {} - Py_ssize_t i, n = len(values) - ndarray[object] out - - # for HDFStore, a bit temporary but... - - from datetime import datetime - f = datetime.fromtimestamp - - out = np.empty(n, dtype='O') - - for i in range(n): - val = util.get_value_1d(values, i) - if val in cache: - out[i] = cache[val] - else: - cache[val] = out[i] = f(val) - - return out - - def maybe_indices_to_slice(ndarray[int64_t] indices, int max_len): cdef: Py_ssize_t i, n = len(indices) @@ -731,145 +678,6 @@ def clean_index_list(list obj): return np.asarray(obj), 0 -ctypedef fused pandas_string: - str - unicode - bytes - - -@cython.boundscheck(False) -@cython.wraparound(False) -cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr): - """ return the maximum size of elements in a 1-dim string array """ - cdef: - Py_ssize_t i, m = 0, l = 0, length = arr.shape[0] - pandas_string v - - for i in range(length): - v = arr[i] - if PyString_Check(v): - l = PyString_GET_SIZE(v) - elif PyBytes_Check(v): - l = PyBytes_GET_SIZE(v) - elif PyUnicode_Check(v): - l = PyUnicode_GET_SIZE(v) - - if l > m: - m = l - - return m - - -@cython.boundscheck(False) -@cython.wraparound(False) -def string_array_replace_from_nan_rep( - ndarray[object, ndim=1] arr, object nan_rep, - object replace=None): - """ - Replace the values in the array with 'replacement' if - they are 'nan_rep'. Return the same array. - """ - - cdef int length = arr.shape[0], i = 0 - if replace is None: - replace = np.nan - - for i from 0 <= i < length: - if arr[i] == nan_rep: - arr[i] = replace - - return arr - - -@cython.boundscheck(False) -@cython.wraparound(False) -def convert_json_to_lines(object arr): - """ - replace comma separated json with line feeds, paying special attention - to quotes & brackets - """ - cdef: - Py_ssize_t i = 0, num_open_brackets_seen = 0, length - bint in_quotes = 0, is_escaping = 0 - ndarray[uint8_t] narr - unsigned char v, comma, left_bracket, right_brack, newline - - newline = ord('\n') - comma = ord(',') - left_bracket = ord('{') - right_bracket = ord('}') - quote = ord('"') - backslash = ord('\\') - - narr = np.frombuffer(arr.encode('utf-8'), dtype='u1').copy() - length = narr.shape[0] - for i in range(length): - v = narr[i] - if v == quote and i > 0 and not is_escaping: - in_quotes = ~in_quotes - if v == backslash or is_escaping: - is_escaping = ~is_escaping - if v == comma: # commas that should be \n - if num_open_brackets_seen == 0 and not in_quotes: - narr[i] = newline - elif v == left_bracket: - if not in_quotes: - num_open_brackets_seen += 1 - elif v == right_bracket: - if not in_quotes: - num_open_brackets_seen -= 1 - - return narr.tostring().decode('utf-8') - - -@cython.boundscheck(False) -@cython.wraparound(False) -def write_csv_rows(list data, ndarray data_index, - int nlevels, ndarray cols, object writer): - - cdef int N, j, i, ncols - cdef list rows - cdef object val - - # In crude testing, N>100 yields little marginal improvement - N=100 - - # pre-allocate rows - ncols = len(cols) - rows = [[None] * (nlevels + ncols) for x in range(N)] - - j = -1 - if nlevels == 1: - for j in range(len(data_index)): - row = rows[j % N] - row[0] = data_index[j] - for i in range(ncols): - row[1 + i] = data[i][j] - - if j >= N - 1 and j % N == N - 1: - writer.writerows(rows) - elif nlevels > 1: - for j in range(len(data_index)): - row = rows[j % N] - row[:nlevels] = list(data_index[j]) - for i in range(ncols): - row[nlevels + i] = data[i][j] - - if j >= N - 1 and j % N == N - 1: - writer.writerows(rows) - else: - for j in range(len(data_index)): - row = rows[j % N] - for i in range(ncols): - row[i] = data[i][j] - - if j >= N - 1 and j % N == N - 1: - writer.writerows(rows) - - if j >= 0 and (j < N - 1 or (j % N) != N - 1): - writer.writerows(rows[:((j + 1) % N)]) - - # ------------------------------------------------------------------------------ # Groupby-related functions diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index efe61716d0831..89d2de6de213a 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -2225,3 +2225,37 @@ def _maybe_encode(values): if values is None: return [] return [x.encode('utf-8') if isinstance(x, unicode) else x for x in values] + + +def sanitize_objects(ndarray[object] values, set na_values, + convert_empty=True): + """ + Convert specified values, including the given set na_values and empty + strings if convert_empty is True, to np.nan. + + Parameters + ---------- + values : ndarray[object] + na_values : set + convert_empty : bool (default True) + """ + cdef: + Py_ssize_t i, n + object val, onan + Py_ssize_t na_count = 0 + dict memo = {} + + n = len(values) + onan = np.nan + + for i from 0 <= i < n: + val = values[i] + if (convert_empty and val == '') or (val in na_values): + values[i] = onan + na_count += 1 + elif val in memo: + values[i] = memo[val] + else: + memo[val] = val + + return na_count diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index b29a2e519efcd..75bff34e4a391 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -6,7 +6,7 @@ from tslibs.nattype import NaT from tslibs.conversion cimport convert_to_tsobject from tslibs.timedeltas cimport convert_to_timedelta64 from tslibs.timezones cimport get_timezone, tz_compare -from datetime import datetime, timedelta + iNaT = util.get_nat() cdef bint PY2 = sys.version_info[0] == 2 @@ -1405,30 +1405,6 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, return objects -def sanitize_objects(ndarray[object] values, set na_values, - convert_empty=True): - cdef: - Py_ssize_t i, n - object val, onan - Py_ssize_t na_count = 0 - dict memo = {} - - n = len(values) - onan = np.nan - - for i from 0 <= i < n: - val = values[i] - if (convert_empty and val == '') or (val in na_values): - values[i] = onan - na_count += 1 - elif val in memo: - values[i] = memo[val] - else: - memo[val] = val - - return na_count - - def maybe_convert_bool(ndarray[object] arr, true_values=None, false_values=None): cdef: diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx new file mode 100644 index 0000000000000..6f07d04b3fad3 --- /dev/null +++ b/pandas/_libs/writers.pyx @@ -0,0 +1,174 @@ +# -*- coding: utf-8 -*- + +cimport cython +from cython cimport Py_ssize_t + +from cpython cimport (PyString_Check, PyBytes_Check, PyUnicode_Check, + PyBytes_GET_SIZE, PyUnicode_GET_SIZE) + +try: + from cpython cimport PyString_GET_SIZE +except ImportError: + from cpython cimport PyUnicode_GET_SIZE as PyString_GET_SIZE + +import numpy as np +cimport numpy as cnp +from numpy cimport ndarray, uint8_t +cnp.import_array() + +cimport util + + +ctypedef fused pandas_string: + str + unicode + bytes + + +@cython.boundscheck(False) +@cython.wraparound(False) +def write_csv_rows(list data, ndarray data_index, + int nlevels, ndarray cols, object writer): + """ + Write the given data to the writer object, pre-allocating where possible + for performance improvements. + + Parameters + ---------- + data : list + data_index : ndarray + nlevels : int + cols : ndarray + writer : object + """ + cdef int N, j, i, ncols + cdef list rows + cdef object val + + # In crude testing, N>100 yields little marginal improvement + N = 100 + + # pre-allocate rows + ncols = len(cols) + rows = [[None] * (nlevels + ncols) for x in range(N)] + + j = -1 + if nlevels == 1: + for j in range(len(data_index)): + row = rows[j % N] + row[0] = data_index[j] + for i in range(ncols): + row[1 + i] = data[i][j] + + if j >= N - 1 and j % N == N - 1: + writer.writerows(rows) + elif nlevels > 1: + for j in range(len(data_index)): + row = rows[j % N] + row[:nlevels] = list(data_index[j]) + for i in range(ncols): + row[nlevels + i] = data[i][j] + + if j >= N - 1 and j % N == N - 1: + writer.writerows(rows) + else: + for j in range(len(data_index)): + row = rows[j % N] + for i in range(ncols): + row[i] = data[i][j] + + if j >= N - 1 and j % N == N - 1: + writer.writerows(rows) + + if j >= 0 and (j < N - 1 or (j % N) != N - 1): + writer.writerows(rows[:((j + 1) % N)]) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def convert_json_to_lines(object arr): + """ + replace comma separated json with line feeds, paying special attention + to quotes & brackets + """ + cdef: + Py_ssize_t i = 0, num_open_brackets_seen = 0, length + bint in_quotes = 0, is_escaping = 0 + ndarray[uint8_t] narr + unsigned char v, comma, left_bracket, right_brack, newline + + newline = ord('\n') + comma = ord(',') + left_bracket = ord('{') + right_bracket = ord('}') + quote = ord('"') + backslash = ord('\\') + + narr = np.frombuffer(arr.encode('utf-8'), dtype='u1').copy() + length = narr.shape[0] + for i in range(length): + v = narr[i] + if v == quote and i > 0 and not is_escaping: + in_quotes = ~in_quotes + if v == backslash or is_escaping: + is_escaping = ~is_escaping + if v == comma: # commas that should be \n + if num_open_brackets_seen == 0 and not in_quotes: + narr[i] = newline + elif v == left_bracket: + if not in_quotes: + num_open_brackets_seen += 1 + elif v == right_bracket: + if not in_quotes: + num_open_brackets_seen -= 1 + + return narr.tostring().decode('utf-8') + + +# stata, pytables +@cython.boundscheck(False) +@cython.wraparound(False) +cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr): + """ return the maximum size of elements in a 1-dim string array """ + cdef: + Py_ssize_t i, m = 0, l = 0, length = arr.shape[0] + pandas_string v + + for i in range(length): + v = arr[i] + if PyString_Check(v): + l = PyString_GET_SIZE(v) + elif PyBytes_Check(v): + l = PyBytes_GET_SIZE(v) + elif PyUnicode_Check(v): + l = PyUnicode_GET_SIZE(v) + + if l > m: + m = l + + return m + + +# ------------------------------------------------------------------ +# PyTables Helpers + + +@cython.boundscheck(False) +@cython.wraparound(False) +def string_array_replace_from_nan_rep( + ndarray[object, ndim=1] arr, object nan_rep, + object replace=None): + """ + Replace the values in the array with 'replacement' if + they are 'nan_rep'. Return the same array. + """ + + cdef int length = arr.shape[0], i = 0 + if replace is None: + replace = np.nan + + for i from 0 <= i < length: + if arr[i] == nan_rep: + arr[i] = replace + + return arr diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 6a298f5137eb1..c3128be0f5599 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -48,7 +48,7 @@ def _raw_hex_id(obj): _DEFAULT_GLOBALS = { - 'Timestamp': pandas._libs.lib.Timestamp, + 'Timestamp': pandas._libs.tslib.Timestamp, 'datetime': datetime.datetime, 'True': True, 'False': False, diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5155662d2f97d..b2816343fc8eb 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -282,7 +282,7 @@ def maybe_promote(dtype, fill_value=np.nan): fill_value = iNaT elif issubclass(dtype.type, np.timedelta64): try: - fill_value = lib.Timedelta(fill_value).value + fill_value = tslib.Timedelta(fill_value).value except Exception: # as for datetimes, cannot upcast to object fill_value = iNaT diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6e777281b11e1..aaa4ae4773108 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10,7 +10,7 @@ import numpy as np import pandas as pd -from pandas._libs import tslib, lib, properties +from pandas._libs import tslib, properties from pandas.core.dtypes.common import ( _ensure_int64, _ensure_object, @@ -7216,9 +7216,9 @@ def describe_categorical_1d(data): if is_datetime64_dtype(data): asint = data.dropna().values.view('i8') names += ['top', 'freq', 'first', 'last'] - result += [lib.Timestamp(top), freq, - lib.Timestamp(asint.min()), - lib.Timestamp(asint.max())] + result += [tslib.Timestamp(top), freq, + tslib.Timestamp(asint.min()), + tslib.Timestamp(asint.max())] else: names += ['top', 'freq'] result += [top, freq] diff --git a/pandas/core/internals.py b/pandas/core/internals.py index f3e5e4c99a899..22d38d3df071e 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2656,7 +2656,7 @@ def _try_coerce_args(self, values, other): other = other.asi8 other_mask = isna(other) elif isinstance(other, (np.datetime64, datetime, date)): - other = lib.Timestamp(other) + other = tslib.Timestamp(other) tz = getattr(other, 'tz', None) # test we can have an equal time zone @@ -2675,7 +2675,7 @@ def _try_coerce_result(self, result): if result.dtype.kind in ['i', 'f', 'O']: result = result.astype('M8[ns]') elif isinstance(result, (np.integer, np.float, np.datetime64)): - result = lib.Timestamp(result, tz=self.values.tz) + result = tslib.Timestamp(result, tz=self.values.tz) if isinstance(result, np.ndarray): # allow passing of > 1dim if its trivial if result.ndim > 1: diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index eda86f12d501d..d4851f579dda4 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -268,7 +268,7 @@ def _wrap_results(result, dtype): if is_datetime64_dtype(dtype): if not isinstance(result, np.ndarray): - result = lib.Timestamp(result) + result = tslib.Timestamp(result) else: result = result.view(dtype) elif is_timedelta64_dtype(dtype): @@ -278,7 +278,7 @@ def _wrap_results(result, dtype): if np.fabs(result) > _int64_max: raise ValueError("overflow in timedelta operation") - result = lib.Timedelta(result, unit='ns') + result = tslib.Timedelta(result, unit='ns') else: result = result.astype('i8').view(dtype) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 706bec9e44892..961c8c004e9e3 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -24,7 +24,7 @@ from pandas.compat.numpy import function as nv from pandas._libs import lib, tslib -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp from pandas._libs.tslibs.period import IncompatibleFrequency from pandas.util._decorators import Appender, Substitution diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index bca0b64cb53fe..269c81b380b5e 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -38,7 +38,7 @@ _stringify_path) from pandas.io.formats.printing import adjoin, justify, pprint_thing from pandas.io.formats.common import get_level_lengths -from pandas._libs import lib +from pandas._libs import lib, writers as libwriters from pandas._libs.tslib import (iNaT, Timestamp, Timedelta, format_array_from_datetime) from pandas.core.indexes.datetimes import DatetimeIndex @@ -1789,7 +1789,8 @@ def _save_chunk(self, start_i, end_i): date_format=self.date_format, quoting=self.quoting) - lib.write_csv_rows(self.data, ix, self.nlevels, self.cols, self.writer) + libwriters.write_csv_rows(self.data, ix, self.nlevels, + self.cols, self.writer) # ---------------------------------------------------------------------- diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index 595031b04e367..c7901f4352d00 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -5,7 +5,7 @@ from collections import defaultdict import numpy as np -from pandas._libs.lib import convert_json_to_lines +from pandas._libs.writers import convert_json_to_lines from pandas import compat, DataFrame diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 5135bb01fb378..af1441f4a0fc9 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1596,11 +1596,12 @@ def _infer_types(self, values, na_values, try_num_bool=True): except Exception: result = values if values.dtype == np.object_: - na_count = lib.sanitize_objects(result, na_values, False) + na_count = parsers.sanitize_objects(result, na_values, + False) else: result = values if values.dtype == np.object_: - na_count = lib.sanitize_objects(values, na_values, False) + na_count = parsers.sanitize_objects(values, na_values, False) if result.dtype == np.object_ and try_num_bool: result = lib.maybe_convert_bool(values, diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 5376473f83f22..0d833807602e1 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -47,7 +47,7 @@ from pandas.core.config import get_option from pandas.core.computation.pytables import Expr, maybe_expression -from pandas._libs import algos, lib +from pandas._libs import algos, lib, writers as libwriters from pandas._libs.tslibs import timezones from distutils.version import LooseVersion @@ -3843,7 +3843,7 @@ def read(self, where=None, columns=None, **kwargs): # need a better algorithm tuple_index = long_index.values - unique_tuples = lib.fast_unique(tuple_index) + unique_tuples = unique(tuple_index) unique_tuples = com._asarray_tuplesafe(unique_tuples) indexer = match(unique_tuples, tuple_index) @@ -4561,7 +4561,8 @@ def _convert_string_array(data, encoding, itemsize=None): # create the sized dtype if itemsize is None: - itemsize = lib.max_len_string_array(_ensure_object(data.ravel())) + ensured = _ensure_object(data.ravel()) + itemsize = libwriters.max_len_string_array(ensured) data = np.asarray(data, dtype="S%d" % itemsize) return data @@ -4590,7 +4591,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None): encoding = _ensure_encoding(encoding) if encoding is not None and len(data): - itemsize = lib.max_len_string_array(_ensure_object(data)) + itemsize = libwriters.max_len_string_array(_ensure_object(data)) if compat.PY3: dtype = "U{0}".format(itemsize) else: @@ -4604,7 +4605,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None): if nan_rep is None: nan_rep = 'nan' - data = lib.string_array_replace_from_nan_rep(data, nan_rep) + data = libwriters.string_array_replace_from_nan_rep(data, nan_rep) return data.reshape(shape) @@ -4621,7 +4622,7 @@ def _get_converter(kind, encoding): if kind == 'datetime64': return lambda x: np.asarray(x, dtype='M8[ns]') elif kind == 'datetime': - return lib.convert_timestamps + return lambda x: to_datetime(x, cache=True).to_pydatetime() elif kind == 'string': return lambda x: _unconvert_string_array(x, encoding=encoding) else: # pragma: no cover diff --git a/pandas/io/stata.py b/pandas/io/stata.py index b409cf20e9a09..16665e19985f1 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -16,8 +16,9 @@ import numpy as np from dateutil.relativedelta import relativedelta -from pandas._libs.lib import max_len_string_array, infer_dtype +from pandas._libs.lib import infer_dtype from pandas._libs.tslib import NaT, Timestamp +from pandas._libs.writers import max_len_string_array import pandas as pd from pandas import compat, to_timedelta, to_datetime, isna, DatetimeIndex diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py index 66ee7fa98491f..07163615c6ba4 100644 --- a/pandas/plotting/_converter.py +++ b/pandas/plotting/_converter.py @@ -23,7 +23,7 @@ from pandas.compat import lrange import pandas.compat as compat -import pandas._libs.lib as lib +from pandas._libs import tslib import pandas.core.common as com from pandas.core.index import Index @@ -52,7 +52,7 @@ def get_pairs(): pairs = [ - (lib.Timestamp, DatetimeConverter), + (tslib.Timestamp, DatetimeConverter), (Period, PeriodConverter), (pydt.datetime, DatetimeConverter), (pydt.date, DatetimeConverter), @@ -312,7 +312,7 @@ def try_parse(values): if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, np.datetime64): - return _dt_to_float_ordinal(lib.Timestamp(values)) + return _dt_to_float_ordinal(tslib.Timestamp(values)) elif isinstance(values, pydt.time): return dates.date2num(values) elif (is_integer(values) or is_float(values)): diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index b59dd25ead57f..197a42bdaacbb 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -7,7 +7,6 @@ import pandas as pd from pandas import offsets import pandas.util.testing as tm -from pandas._libs import lib from pandas._libs.tslib import OutOfBoundsDatetime from pandas._libs.tslibs import conversion from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range, @@ -537,7 +536,7 @@ def test_datetimeindex_constructor_misc(self): arr = [datetime(2005, 1, 1), '1/2/2005', '1/3/2005', '2005-01-04'] idx2 = DatetimeIndex(arr) - arr = [lib.Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005', + arr = [Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005', '2005-01-04'] idx3 = DatetimeIndex(arr) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 508c3a73f48c7..974099f1fbbe9 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -29,7 +29,7 @@ from pandas.core.indexes.datetimes import _to_m8 import pandas as pd -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp class TestIndex(Base): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index aedc957ec67da..e59456b8a2d5e 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -19,7 +19,7 @@ from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.indexes.base import InvalidIndexError from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp import pandas.util.testing as tm diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 3de1c4c982654..0c1bec7a6f1a9 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -13,7 +13,7 @@ import pandas.util.testing as tm import pandas as pd -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp from pandas.tests.indexes.common import Base diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index cd1685f282bd2..e949772981eb7 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -425,7 +425,7 @@ def test_npy_nat(self): assert ujson.encode(input) == 'null', "Expected null" def test_datetime_units(self): - from pandas._libs.lib import Timestamp + from pandas._libs.tslib import Timestamp val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504) stamp = Timestamp(val) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 8525cb42c2455..bc972076c6a80 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -11,7 +11,7 @@ import pytest import numpy as np -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp import pandas as pd import pandas.util.testing as tm diff --git a/pandas/tests/io/parser/converters.py b/pandas/tests/io/parser/converters.py index 1176b1e84e29b..ae35d45591dc5 100644 --- a/pandas/tests/io/parser/converters.py +++ b/pandas/tests/io/parser/converters.py @@ -13,7 +13,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp from pandas import DataFrame, Index from pandas.compat import parse_date, StringIO, lmap diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index b7d0dd1a3484f..919b357f14236 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -11,7 +11,7 @@ import pytest import numpy as np from pandas._libs.tslibs import parsing -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp import pandas as pd import pandas.io.parsers as parsers diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py index ec240531925e3..7717102b64fc5 100644 --- a/pandas/tests/io/parser/test_parsers.py +++ b/pandas/tests/io/parser/test_parsers.py @@ -5,7 +5,7 @@ from pandas import read_csv, read_table, DataFrame import pandas.core.common as com -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp from pandas.compat import StringIO from .common import ParserTests diff --git a/pandas/tests/io/parser/usecols.py b/pandas/tests/io/parser/usecols.py index 8767055239cd5..195fb4cba2aed 100644 --- a/pandas/tests/io/parser/usecols.py +++ b/pandas/tests/io/parser/usecols.py @@ -11,7 +11,7 @@ import pandas.util.testing as tm from pandas import DataFrame, Index -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp from pandas.compat import StringIO diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index fbfbad547ce1b..e5c3d6f7d3ee1 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -17,7 +17,7 @@ Categorical) from pandas.core.indexing import IndexingError from pandas.tseries.offsets import BDay -from pandas._libs import tslib, lib +from pandas._libs import tslib from pandas.compat import lrange, range from pandas import compat @@ -2707,7 +2707,7 @@ def test_fancy_getitem(self): assert s['1/2/2009'] == 48 assert s['2009-1-2'] == 48 assert s[datetime(2009, 1, 2)] == 48 - assert s[lib.Timestamp(datetime(2009, 1, 2))] == 48 + assert s[Timestamp(datetime(2009, 1, 2))] == 48 pytest.raises(KeyError, s.__getitem__, '2009-1-3') assert_series_equal(s['3/6/2009':'2009-06-05'], diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 10061204df42a..502f0c3bced61 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -3,7 +3,7 @@ import pytest import numpy as np -from pandas._libs import lib +from pandas._libs import lib, writers as libwriters import pandas.util.testing as tm @@ -12,19 +12,19 @@ class TestMisc(object): def test_max_len_string_array(self): arr = a = np.array(['foo', 'b', np.nan], dtype='object') - assert lib.max_len_string_array(arr) == 3 + assert libwriters.max_len_string_array(arr) == 3 # unicode arr = a.astype('U').astype(object) - assert lib.max_len_string_array(arr) == 3 + assert libwriters.max_len_string_array(arr) == 3 # bytes for python3 arr = a.astype('S').astype(object) - assert lib.max_len_string_array(arr) == 3 + assert libwriters.max_len_string_array(arr) == 3 # raises pytest.raises(TypeError, - lambda: lib.max_len_string_array(arr.astype('U'))) + lambda: libwriters.max_len_string_array(arr.astype('U'))) def test_fast_unique_multiple_list_gen_sort(self): keys = [['p', 'a'], ['n', 'd'], ['a', 's']] diff --git a/setup.py b/setup.py index 721e6f62bd3e4..4d42379eef11b 100755 --- a/setup.py +++ b/setup.py @@ -328,6 +328,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/tslibs/frequencies.pyx', 'pandas/_libs/tslibs/resolution.pyx', 'pandas/_libs/tslibs/parsing.pyx', + 'pandas/_libs/writers.pyx', 'pandas/io/sas/sas.pyx'] def initialize_options(self): @@ -616,6 +617,9 @@ def pxd(name): '_libs.window': { 'pyxfile': '_libs/window', 'pxdfiles': ['_libs/skiplist', '_libs/src/util']}, + '_libs.writers': { + 'pyxfile': '_libs/writers', + 'pxdfiles': ['_libs/src/util']}, 'io.sas._sas': { 'pyxfile': 'io/sas/sas'}}