Skip to content

Commit 36ec478

Browse files
authored
Merge branch 'master' into issue-19206
2 parents 5eb3ae3 + d3f7d2a commit 36ec478

35 files changed

+1182
-1058
lines changed

doc/source/api.rst

-1
Original file line numberDiff line numberDiff line change
@@ -1617,7 +1617,6 @@ IntervalIndex Components
16171617
IntervalIndex.from_arrays
16181618
IntervalIndex.from_tuples
16191619
IntervalIndex.from_breaks
1620-
IntervalIndex.from_intervals
16211620
IntervalIndex.contains
16221621
IntervalIndex.left
16231622
IntervalIndex.right

doc/source/whatsnew/v0.23.0.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -207,9 +207,8 @@ Other Enhancements
207207
:func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas
208208
to register custom accessors like ``.cat`` on pandas objects. See
209209
:ref:`Registering Custom Accessors <developer.register-accessors>` for more (:issue:`14781`).
210-
211-
212210
- ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`)
211+
- :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`)
213212

214213
.. _whatsnew_0230.api_breaking:
215214

@@ -329,6 +328,7 @@ Deprecations
329328
- ``Series.valid`` is deprecated. Use :meth:`Series.dropna` instead (:issue:`18800`).
330329
- :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`)
331330
- The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`).
331+
- ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`)
332332

333333

334334
.. _whatsnew_0230.prior_deprecations:

pandas/_libs/interval.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ cdef class Interval(IntervalMixin):
109109
cut, qcut : Convert arrays of continuous data into Categoricals/Series of
110110
Interval.
111111
"""
112+
_typ = "interval"
112113

113114
cdef readonly object left
114115
"""Left bound for the interval"""

pandas/_libs/lib.pyx

-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
4141
PyDateTime_IMPORT
4242

4343
from tslib import NaT, Timestamp, Timedelta, array_to_datetime
44-
from interval import Interval
4544
from missing cimport checknull
4645

4746

pandas/_libs/parsers.pyx

+17-97
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,8 @@ import warnings
77

88
from csv import QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_NONE
99

10-
from libc.stdio cimport fopen, fclose
11-
from libc.stdlib cimport malloc, free
12-
from libc.string cimport strncpy, strlen, strcmp, strcasecmp
10+
from libc.stdlib cimport free
11+
from libc.string cimport strncpy, strlen, strcasecmp
1312

1413
cimport cython
1514
from cython cimport Py_ssize_t
@@ -27,9 +26,6 @@ cdef extern from "Python.h":
2726
object PyUnicode_Decode(char *v, Py_ssize_t size, char *encoding,
2827
char *errors)
2928

30-
cdef extern from "stdlib.h":
31-
void memcpy(void *dst, void *src, size_t n)
32-
3329

3430
import numpy as np
3531
cimport numpy as cnp
@@ -50,7 +46,7 @@ from khash cimport (
5046

5147
import pandas.compat as compat
5248
from pandas.core.dtypes.common import (
53-
is_categorical_dtype, CategoricalDtype,
49+
is_categorical_dtype,
5450
is_integer_dtype, is_float_dtype,
5551
is_bool_dtype, is_object_dtype,
5652
is_datetime64_dtype,
@@ -90,9 +86,6 @@ try:
9086
except NameError:
9187
basestring = str
9288

93-
cdef extern from "src/numpy_helper.h":
94-
void transfer_object_column(char *dst, char *src, size_t stride,
95-
size_t length)
9689

9790
cdef extern from "parser/tokenizer.h":
9891

@@ -232,8 +225,6 @@ cdef extern from "parser/tokenizer.h":
232225

233226
int parser_trim_buffers(parser_t *self)
234227

235-
void debug_print_parser(parser_t *self)
236-
237228
int tokenize_all_rows(parser_t *self) nogil
238229
int tokenize_nrows(parser_t *self, size_t nrows) nogil
239230

@@ -249,7 +240,6 @@ cdef extern from "parser/tokenizer.h":
249240
double round_trip(const char *p, char **q, char decimal, char sci,
250241
char tsep, int skip_trailing) nogil
251242

252-
int to_longlong(char *item, long long *p_value) nogil
253243
int to_boolean(const char *item, uint8_t *val) nogil
254244

255245

@@ -875,9 +865,6 @@ cdef class TextReader:
875865

876866
return header, field_count
877867

878-
cdef _implicit_index_count(self):
879-
pass
880-
881868
def read(self, rows=None):
882869
"""
883870
rows=None --> read all rows
@@ -997,9 +984,6 @@ cdef class TextReader:
997984

998985
return columns
999986

1000-
def debug_print(self):
1001-
debug_print_parser(self.parser)
1002-
1003987
cdef _start_clock(self):
1004988
self.clocks.append(time.time())
1005989

@@ -1346,6 +1330,7 @@ cdef class TextReader:
13461330
else:
13471331
return None
13481332

1333+
13491334
cdef object _true_values = [b'True', b'TRUE', b'true']
13501335
cdef object _false_values = [b'False', b'FALSE', b'false']
13511336

@@ -1375,21 +1360,6 @@ cdef asbytes(object o):
13751360
_NA_VALUES = _ensure_encoded(list(com._NA_VALUES))
13761361

13771362

1378-
def _is_file_like(obj):
1379-
if PY3:
1380-
import io
1381-
if isinstance(obj, io.TextIOWrapper):
1382-
raise ParserError('Cannot handle open unicode files (yet)')
1383-
1384-
# BufferedReader is a byte reader for Python 3
1385-
file = io.BufferedReader
1386-
else:
1387-
import __builtin__
1388-
file = __builtin__.file
1389-
1390-
return isinstance(obj, (basestring, file))
1391-
1392-
13931363
def _maybe_upcast(arr):
13941364
"""
13951365
@@ -1479,6 +1449,7 @@ cdef _string_box_factorize(parser_t *parser, int64_t col,
14791449

14801450
return result, na_count
14811451

1452+
14821453
cdef _string_box_utf8(parser_t *parser, int64_t col,
14831454
int64_t line_start, int64_t line_end,
14841455
bint na_filter, kh_str_t *na_hashset):
@@ -1532,6 +1503,7 @@ cdef _string_box_utf8(parser_t *parser, int64_t col,
15321503

15331504
return result, na_count
15341505

1506+
15351507
cdef _string_box_decode(parser_t *parser, int64_t col,
15361508
int64_t line_start, int64_t line_end,
15371509
bint na_filter, kh_str_t *na_hashset,
@@ -1662,6 +1634,7 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
16621634
kh_destroy_str(table)
16631635
return np.asarray(codes), result, na_count
16641636

1637+
16651638
cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start,
16661639
int64_t line_end, int64_t width):
16671640
cdef:
@@ -1679,6 +1652,7 @@ cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start,
16791652

16801653
return result
16811654

1655+
16821656
cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col,
16831657
int64_t line_start, int64_t line_end,
16841658
size_t width, char *data) nogil:
@@ -1694,10 +1668,12 @@ cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col,
16941668
strncpy(data, word, width)
16951669
data += width
16961670

1671+
16971672
cdef char* cinf = b'inf'
16981673
cdef char* cposinf = b'+inf'
16991674
cdef char* cneginf = b'-inf'
17001675

1676+
17011677
cdef _try_double(parser_t *parser, int64_t col,
17021678
int64_t line_start, int64_t line_end,
17031679
bint na_filter, kh_str_t *na_hashset, object na_flist):
@@ -1738,6 +1714,7 @@ cdef _try_double(parser_t *parser, int64_t col,
17381714
return None, None
17391715
return result, na_count
17401716

1717+
17411718
cdef inline int _try_double_nogil(parser_t *parser,
17421719
double (*double_converter)(
17431720
const char *, char **, char,
@@ -1808,6 +1785,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
18081785

18091786
return 0
18101787

1788+
18111789
cdef _try_uint64(parser_t *parser, int64_t col,
18121790
int64_t line_start, int64_t line_end,
18131791
bint na_filter, kh_str_t *na_hashset):
@@ -1843,6 +1821,7 @@ cdef _try_uint64(parser_t *parser, int64_t col,
18431821

18441822
return result
18451823

1824+
18461825
cdef inline int _try_uint64_nogil(parser_t *parser, int64_t col,
18471826
int64_t line_start,
18481827
int64_t line_end, bint na_filter,
@@ -1881,6 +1860,7 @@ cdef inline int _try_uint64_nogil(parser_t *parser, int64_t col,
18811860

18821861
return 0
18831862

1863+
18841864
cdef _try_int64(parser_t *parser, int64_t col,
18851865
int64_t line_start, int64_t line_end,
18861866
bint na_filter, kh_str_t *na_hashset):
@@ -1909,6 +1889,7 @@ cdef _try_int64(parser_t *parser, int64_t col,
19091889

19101890
return result, na_count
19111891

1892+
19121893
cdef inline int _try_int64_nogil(parser_t *parser, int64_t col,
19131894
int64_t line_start,
19141895
int64_t line_end, bint na_filter,
@@ -1948,69 +1929,6 @@ cdef inline int _try_int64_nogil(parser_t *parser, int64_t col,
19481929

19491930
return 0
19501931

1951-
cdef _try_bool(parser_t *parser, int64_t col,
1952-
int64_t line_start, int64_t line_end,
1953-
bint na_filter, kh_str_t *na_hashset):
1954-
cdef:
1955-
int na_count
1956-
Py_ssize_t lines = line_end - line_start
1957-
uint8_t *data
1958-
cnp.ndarray[cnp.uint8_t, ndim=1] result
1959-
1960-
uint8_t NA = na_values[np.bool_]
1961-
1962-
result = np.empty(lines)
1963-
data = <uint8_t *> result.data
1964-
1965-
with nogil:
1966-
error = _try_bool_nogil(parser, col, line_start,
1967-
line_end, na_filter,
1968-
na_hashset, NA, data,
1969-
&na_count)
1970-
if error != 0:
1971-
return None, None
1972-
return result.view(np.bool_), na_count
1973-
1974-
cdef inline int _try_bool_nogil(parser_t *parser, int64_t col,
1975-
int64_t line_start,
1976-
int64_t line_end, bint na_filter,
1977-
const kh_str_t *na_hashset, uint8_t NA,
1978-
uint8_t *data, int *na_count) nogil:
1979-
cdef:
1980-
int error
1981-
Py_ssize_t i, lines = line_end - line_start
1982-
coliter_t it
1983-
const char *word = NULL
1984-
khiter_t k
1985-
na_count[0] = 0
1986-
1987-
coliter_setup(&it, parser, col, line_start)
1988-
1989-
if na_filter:
1990-
for i in range(lines):
1991-
COLITER_NEXT(it, word)
1992-
1993-
k = kh_get_str(na_hashset, word)
1994-
# in the hash table
1995-
if k != na_hashset.n_buckets:
1996-
na_count[0] += 1
1997-
data[0] = NA
1998-
data += 1
1999-
continue
2000-
2001-
error = to_boolean(word, data)
2002-
if error != 0:
2003-
return error
2004-
data += 1
2005-
else:
2006-
for i in range(lines):
2007-
COLITER_NEXT(it, word)
2008-
2009-
error = to_boolean(word, data)
2010-
if error != 0:
2011-
return error
2012-
data += 1
2013-
return 0
20141932

20151933
cdef _try_bool_flex(parser_t *parser, int64_t col,
20161934
int64_t line_start, int64_t line_end,
@@ -2039,6 +1957,7 @@ cdef _try_bool_flex(parser_t *parser, int64_t col,
20391957
return None, None
20401958
return result.view(np.bool_), na_count
20411959

1960+
20421961
cdef inline int _try_bool_flex_nogil(parser_t *parser, int64_t col,
20431962
int64_t line_start,
20441963
int64_t line_end, bint na_filter,
@@ -2131,6 +2050,7 @@ cdef kh_str_t* kset_from_list(list values) except NULL:
21312050

21322051
return table
21332052

2053+
21342054
cdef kh_float64_t* kset_float64_from_list(values) except NULL:
21352055
# caller takes responsibility for freeing the hash table
21362056
cdef:

pandas/_libs/src/inference.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ cpdef bint is_decimal(object obj):
3737

3838

3939
cpdef bint is_interval(object obj):
40-
return isinstance(obj, Interval)
40+
return getattr(obj, '_typ', '_typ') == 'interval'
4141

4242

4343
cpdef bint is_period(object val):

pandas/_libs/src/numpy_helper.h

-16
Original file line numberDiff line numberDiff line change
@@ -75,22 +75,6 @@ PANDAS_INLINE PyObject* char_to_string(char* data) {
7575
#endif
7676
}
7777

78-
void transfer_object_column(char* dst, char* src, size_t stride,
79-
size_t length) {
80-
size_t i;
81-
size_t sz = sizeof(PyObject*);
82-
83-
for (i = 0; i < length; ++i) {
84-
// uninitialized data
85-
86-
// Py_XDECREF(*((PyObject**) dst));
87-
88-
memcpy(dst, src, sz);
89-
Py_INCREF(*((PyObject**)dst));
90-
src += sz;
91-
dst += stride;
92-
}
93-
}
9478

9579
void set_array_not_contiguous(PyArrayObject* ao) {
9680
ao->flags &= ~(NPY_C_CONTIGUOUS | NPY_F_CONTIGUOUS);

pandas/_libs/src/parser/.gitignore

-2
This file was deleted.

pandas/_libs/src/parser/Makefile

-13
This file was deleted.

pandas/_libs/src/parser/tokenizer.c

-15
Original file line numberDiff line numberDiff line change
@@ -1317,21 +1317,6 @@ int parser_trim_buffers(parser_t *self) {
13171317
return 0;
13181318
}
13191319

1320-
void debug_print_parser(parser_t *self) {
1321-
int64_t j, line;
1322-
char *token;
1323-
1324-
for (line = 0; line < self->lines; ++line) {
1325-
printf("(Parsed) Line %lld: ", (long long)line);
1326-
1327-
for (j = 0; j < self->line_fields[j]; ++j) {
1328-
token = self->words[j + self->line_start[line]];
1329-
printf("%s ", token);
1330-
}
1331-
printf("\n");
1332-
}
1333-
}
1334-
13351320
/*
13361321
nrows : number of rows to tokenize (or until reach EOF)
13371322
all : tokenize all the data vs. certain number of rows

0 commit comments

Comments
 (0)