Skip to content

Commit fa28d95

Browse files
Merge remote-tracking branch 'upstream/master' into fixup-bool
2 parents de2d4f6 + 1cc40fa commit fa28d95

File tree

192 files changed

+3084
-1313
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

192 files changed

+3084
-1313
lines changed

.github/workflows/ci.yml

+9-11
Original file line numberDiff line numberDiff line change
@@ -155,35 +155,33 @@ jobs:
155155
run: |
156156
source activate pandas-dev
157157
158-
pytest pandas/tests/frame/methods
159-
pytest pandas/tests/frame/test_constructors.py
160-
pytest pandas/tests/frame/test_*
161-
pytest pandas/tests/frame/test_reductions.py
158+
pytest pandas/tests/frame/
162159
pytest pandas/tests/reductions/
163160
pytest pandas/tests/generic/test_generic.py
164161
pytest pandas/tests/arithmetic/
165162
pytest pandas/tests/groupby/
166163
pytest pandas/tests/resample/
167164
pytest pandas/tests/reshape/merge
168-
169-
pytest pandas/tests/series/methods
170-
pytest pandas/tests/series/test_*
165+
pytest pandas/tests/series/
171166
172167
# indexing subset (temporary since other tests don't pass yet)
173-
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean
174-
pytest pandas/tests/frame/indexing/test_where.py
175-
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_multi_index
176-
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns
177168
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups
178169
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column
179170
180171
pytest pandas/tests/api/
172+
pytest pandas/tests/arrays/
181173
pytest pandas/tests/base/
182174
pytest pandas/tests/computation/
183175
pytest pandas/tests/config/
184176
pytest pandas/tests/dtypes/
185177
pytest pandas/tests/generic/
186178
pytest pandas/tests/indexes/
179+
pytest pandas/tests/io/test_* -m "not slow and not clipboard"
180+
pytest pandas/tests/io/excel/ -m "not slow and not clipboard"
181+
pytest pandas/tests/io/formats/ -m "not slow and not clipboard"
182+
pytest pandas/tests/io/parser/ -m "not slow and not clipboard"
183+
pytest pandas/tests/io/sas/ -m "not slow and not clipboard"
184+
pytest pandas/tests/io/xml/ -m "not slow and not clipboard"
187185
pytest pandas/tests/libs/
188186
pytest pandas/tests/plotting/
189187
pytest pandas/tests/scalar/

.pre-commit-config.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ repos:
1616
- id: codespell
1717
types_or: [python, rst, markdown]
1818
files: ^(pandas|doc)/
19-
exclude: ^pandas/tests/
2019
- repo: https://github.com/pre-commit/pre-commit-hooks
2120
rev: v3.4.0
2221
hooks:
@@ -95,7 +94,7 @@ repos:
9594
entry: python scripts/check_for_inconsistent_pandas_namespace.py
9695
language: python
9796
types: [python]
98-
files: ^pandas/tests/frame/
97+
files: ^pandas/tests/
9998
- id: incorrect-code-directives
10099
name: Check for incorrect code block or IPython directives
101100
language: pygrep

doc/source/_static/css/pandas.css

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
:root {
44
/* Use softer blue from bootstrap's default info color */
5-
--color-info: 23, 162, 184;
5+
--pst-color-info: 23, 162, 184;
66
}
77

88
/* Getting started index page */

doc/source/whatsnew/v1.3.0.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -591,7 +591,7 @@ Reshaping
591591
- Bug in :meth:`DataFrame.append` returning incorrect dtypes with combinations of ``datetime64`` and ``timedelta64`` dtypes (:issue:`39574`)
592592
- Bug in :meth:`DataFrame.pivot_table` returning a ``MultiIndex`` for a single value when operating on and empty ``DataFrame`` (:issue:`13483`)
593593
- Allow :class:`Index` to be passed to the :func:`numpy.all` function (:issue:`40180`)
594-
-
594+
- Bug in :meth:`DataFrame.stack` not preserving ``CategoricalDtype`` in a ``MultiIndex`` (:issue:`36991`)
595595

596596
Sparse
597597
^^^^^^
@@ -613,7 +613,7 @@ Other
613613
- Bug in :func:`pandas.api.types.infer_dtype` not recognizing Series, Index or array with a period dtype (:issue:`23553`)
614614
- Bug in :func:`pandas.api.types.infer_dtype` raising an error for general :class:`.ExtensionArray` objects. It will now return ``"unknown-array"`` instead of raising (:issue:`37367`)
615615
- Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`)
616-
- Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`)
616+
- Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`, :issue:`40334`)
617617
- ``inspect.getmembers(Series)`` no longer raises an ``AbstractMethodError`` (:issue:`38782`)
618618
- Bug in :meth:`Series.where` with numeric dtype and ``other = None`` not casting to ``nan`` (:issue:`39761`)
619619
- :meth:`Index.where` behavior now mirrors :meth:`Index.putmask` behavior, i.e. ``index.where(mask, other)`` matches ``index.putmask(~mask, other)`` (:issue:`39412`)

environment.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ channels:
33
- conda-forge
44
dependencies:
55
# required
6-
- numpy>=1.16.5, <1.20 # gh-39513
6+
- numpy>=1.16.5
77
- python=3
88
- python-dateutil>=2.7.3
99
- pytz
@@ -113,5 +113,5 @@ dependencies:
113113
- tabulate>=0.8.3 # DataFrame.to_markdown
114114
- natsort # DataFrame.sort_values
115115
- pip:
116-
- git+https://github.com/pandas-dev/pydata-sphinx-theme.git@2488b7defbd3d753dd5fcfc890fc4a7e79d25103
116+
- git+https://github.com/pydata/pydata-sphinx-theme.git@master
117117
- numpydoc < 1.2 # 2021-02-09 1.2dev breaking CI

pandas/_libs/algos_take_helper.pxi.in

+2-2
Original file line numberDiff line numberDiff line change
@@ -230,10 +230,10 @@ ctypedef fused take_t:
230230
object
231231

232232

233-
cdef _take_2d(ndarray[take_t, ndim=2] values, object idx):
233+
cdef _take_2d(ndarray[take_t, ndim=2] values, ndarray[intp_t, ndim=2] idx):
234234
cdef:
235235
Py_ssize_t i, j, N, K
236-
ndarray[Py_ssize_t, ndim=2, cast=True] indexer = idx
236+
ndarray[intp_t, ndim=2, cast=True] indexer = idx
237237
ndarray[take_t, ndim=2] result
238238

239239
N, K = (<object>values).shape

pandas/_libs/internals.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ from pandas._libs.util cimport is_integer_object
2525

2626

2727
@cython.final
28+
@cython.freelist(32)
2829
cdef class BlockPlacement:
2930
# __slots__ = '_as_slice', '_as_array', '_len'
3031
cdef:

pandas/_libs/parsers.pyx

+17-114
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ from cpython.ref cimport (
3636
from cpython.unicode cimport (
3737
PyUnicode_AsUTF8String,
3838
PyUnicode_Decode,
39+
PyUnicode_DecodeUTF8,
3940
)
4041

4142

@@ -321,7 +322,6 @@ cdef class TextReader:
321322
bint na_filter, keep_default_na, verbose, has_usecols, has_mi_columns
322323
uint64_t parser_start
323324
list clocks
324-
char *c_encoding
325325
const char *encoding_errors
326326
kh_str_starts_t *false_set
327327
kh_str_starts_t *true_set
@@ -337,7 +337,7 @@ cdef class TextReader:
337337
object skiprows
338338
object dtype
339339
object usecols
340-
list dtype_cast_order
340+
list dtype_cast_order # list[np.dtype]
341341
set unnamed_cols
342342
set noconvert
343343

@@ -381,7 +381,6 @@ cdef class TextReader:
381381
encoding_errors=b"strict"):
382382

383383
# set encoding for native Python and C library
384-
self.c_encoding = NULL
385384
if isinstance(encoding_errors, str):
386385
encoding_errors = encoding_errors.encode("utf-8")
387386
Py_INCREF(encoding_errors)
@@ -638,7 +637,6 @@ cdef class TextReader:
638637
char *word
639638
object name, old_name
640639
uint64_t hr, data_line = 0
641-
StringPath path = _string_path(self.c_encoding)
642640
list header = []
643641
set unnamed_cols = set()
644642

@@ -678,8 +676,8 @@ cdef class TextReader:
678676
for i in range(field_count):
679677
word = self.parser.words[start + i]
680678

681-
name = PyUnicode_Decode(word, strlen(word),
682-
self.c_encoding, self.encoding_errors)
679+
name = PyUnicode_DecodeUTF8(word, strlen(word),
680+
self.encoding_errors)
683681

684682
# We use this later when collecting placeholder names.
685683
old_name = name
@@ -987,8 +985,7 @@ cdef class TextReader:
987985
f"for column {name} - only the converter will "
988986
f"be used"), ParserWarning,
989987
stacklevel=5)
990-
results[i] = _apply_converter(conv, self.parser, i, start, end,
991-
self.c_encoding)
988+
results[i] = _apply_converter(conv, self.parser, i, start, end)
992989
continue
993990

994991
# Collect the list of NaN values associated with the column.
@@ -1102,8 +1099,7 @@ cdef class TextReader:
11021099
# TODO: I suspect that _categorical_convert could be
11031100
# optimized when dtype is an instance of CategoricalDtype
11041101
codes, cats, na_count = _categorical_convert(
1105-
self.parser, i, start, end, na_filter,
1106-
na_hashset, self.c_encoding)
1102+
self.parser, i, start, end, na_filter, na_hashset)
11071103

11081104
# Method accepts list of strings, not encoded ones.
11091105
true_values = [x.decode() for x in self.true_values]
@@ -1199,14 +1195,8 @@ cdef class TextReader:
11991195
cdef _string_convert(self, Py_ssize_t i, int64_t start, int64_t end,
12001196
bint na_filter, kh_str_starts_t *na_hashset):
12011197

1202-
cdef StringPath path = _string_path(self.c_encoding)
1203-
1204-
if path == UTF8:
1205-
return _string_box_utf8(self.parser, i, start, end, na_filter,
1206-
na_hashset, self.encoding_errors)
1207-
elif path == ENCODED:
1208-
return _string_box_decode(self.parser, i, start, end,
1209-
na_filter, na_hashset, self.c_encoding)
1198+
return _string_box_utf8(self.parser, i, start, end, na_filter,
1199+
na_hashset, self.encoding_errors)
12101200

12111201
def _get_converter(self, i, name):
12121202
if self.converters is None:
@@ -1336,18 +1326,6 @@ def _maybe_upcast(arr):
13361326
return arr
13371327

13381328

1339-
cdef enum StringPath:
1340-
UTF8
1341-
ENCODED
1342-
1343-
1344-
# factored out logic to pick string converter
1345-
cdef inline StringPath _string_path(char *encoding):
1346-
if encoding != NULL and encoding != b"utf-8":
1347-
return ENCODED
1348-
return UTF8
1349-
1350-
13511329
# ----------------------------------------------------------------------
13521330
# Type conversions / inference support code
13531331

@@ -1406,68 +1384,10 @@ cdef _string_box_utf8(parser_t *parser, int64_t col,
14061384
return result, na_count
14071385

14081386

1409-
cdef _string_box_decode(parser_t *parser, int64_t col,
1410-
int64_t line_start, int64_t line_end,
1411-
bint na_filter, kh_str_starts_t *na_hashset,
1412-
char *encoding):
1413-
cdef:
1414-
int na_count = 0
1415-
Py_ssize_t i, size, lines
1416-
coliter_t it
1417-
const char *word = NULL
1418-
ndarray[object] result
1419-
1420-
int ret = 0
1421-
kh_strbox_t *table
1422-
1423-
char *errors = "strict"
1424-
1425-
object pyval
1426-
1427-
object NA = na_values[np.object_]
1428-
khiter_t k
1429-
1430-
table = kh_init_strbox()
1431-
lines = line_end - line_start
1432-
result = np.empty(lines, dtype=np.object_)
1433-
coliter_setup(&it, parser, col, line_start)
1434-
1435-
for i in range(lines):
1436-
COLITER_NEXT(it, word)
1437-
1438-
if na_filter:
1439-
if kh_get_str_starts_item(na_hashset, word):
1440-
# in the hash table
1441-
na_count += 1
1442-
result[i] = NA
1443-
continue
1444-
1445-
k = kh_get_strbox(table, word)
1446-
1447-
# in the hash table
1448-
if k != table.n_buckets:
1449-
# this increments the refcount, but need to test
1450-
pyval = <object>table.vals[k]
1451-
else:
1452-
# box it. new ref?
1453-
size = strlen(word)
1454-
pyval = PyUnicode_Decode(word, size, encoding, errors)
1455-
1456-
k = kh_put_strbox(table, word, &ret)
1457-
table.vals[k] = <PyObject *>pyval
1458-
1459-
result[i] = pyval
1460-
1461-
kh_destroy_strbox(table)
1462-
1463-
return result, na_count
1464-
1465-
14661387
@cython.boundscheck(False)
14671388
cdef _categorical_convert(parser_t *parser, int64_t col,
14681389
int64_t line_start, int64_t line_end,
1469-
bint na_filter, kh_str_starts_t *na_hashset,
1470-
char *encoding):
1390+
bint na_filter, kh_str_starts_t *na_hashset):
14711391
"Convert column data into codes, categories"
14721392
cdef:
14731393
int na_count = 0
@@ -1480,7 +1400,6 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
14801400
int64_t current_category = 0
14811401

14821402
char *errors = "strict"
1483-
StringPath path = _string_path(encoding)
14841403

14851404
int ret = 0
14861405
kh_str_t *table
@@ -1516,16 +1435,9 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
15161435

15171436
# parse and box categories to python strings
15181437
result = np.empty(table.n_occupied, dtype=np.object_)
1519-
if path == ENCODED:
1520-
for k in range(table.n_buckets):
1521-
if kh_exist_str(table, k):
1522-
size = strlen(table.keys[k])
1523-
result[table.vals[k]] = PyUnicode_Decode(
1524-
table.keys[k], size, encoding, errors)
1525-
elif path == UTF8:
1526-
for k in range(table.n_buckets):
1527-
if kh_exist_str(table, k):
1528-
result[table.vals[k]] = PyUnicode_FromString(table.keys[k])
1438+
for k in range(table.n_buckets):
1439+
if kh_exist_str(table, k):
1440+
result[table.vals[k]] = PyUnicode_FromString(table.keys[k])
15291441

15301442
kh_destroy_str(table)
15311443
return np.asarray(codes), result, na_count
@@ -2064,13 +1976,11 @@ for k in list(na_values):
20641976

20651977

20661978
cdef _apply_converter(object f, parser_t *parser, int64_t col,
2067-
int64_t line_start, int64_t line_end,
2068-
char* c_encoding):
1979+
int64_t line_start, int64_t line_end):
20691980
cdef:
20701981
Py_ssize_t i, lines
20711982
coliter_t it
20721983
const char *word = NULL
2073-
char *errors = "strict"
20741984
ndarray[object] result
20751985
object val
20761986

@@ -2079,17 +1989,10 @@ cdef _apply_converter(object f, parser_t *parser, int64_t col,
20791989

20801990
coliter_setup(&it, parser, col, line_start)
20811991

2082-
if c_encoding == NULL or c_encoding == b'utf-8':
2083-
for i in range(lines):
2084-
COLITER_NEXT(it, word)
2085-
val = PyUnicode_FromString(word)
2086-
result[i] = f(val)
2087-
else:
2088-
for i in range(lines):
2089-
COLITER_NEXT(it, word)
2090-
val = PyUnicode_Decode(word, strlen(word),
2091-
c_encoding, errors)
2092-
result[i] = f(val)
1992+
for i in range(lines):
1993+
COLITER_NEXT(it, word)
1994+
val = PyUnicode_FromString(word)
1995+
result[i] = f(val)
20931996

20941997
return lib.maybe_convert_objects(result)
20951998

pandas/_testing/asserters.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -976,8 +976,8 @@ def assert_series_equal(
976976
left_values = left._values
977977
right_values = right._values
978978
# Only check exact if dtype is numeric
979-
if is_extension_array_dtype(left_values) and is_extension_array_dtype(
980-
right_values
979+
if isinstance(left_values, ExtensionArray) and isinstance(
980+
right_values, ExtensionArray
981981
):
982982
assert_extension_array_equal(
983983
left_values,

pandas/_typing.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
from pandas.core.dtypes.dtypes import ExtensionDtype
4848

4949
from pandas import Interval
50-
from pandas.core.arrays.base import ExtensionArray # noqa: F401
50+
from pandas.core.arrays.base import ExtensionArray
5151
from pandas.core.frame import DataFrame
5252
from pandas.core.generic import NDFrame # noqa: F401
5353
from pandas.core.groupby.generic import (
@@ -74,8 +74,8 @@
7474

7575
# array-like
7676

77-
AnyArrayLike = TypeVar("AnyArrayLike", "ExtensionArray", "Index", "Series", np.ndarray)
78-
ArrayLike = TypeVar("ArrayLike", "ExtensionArray", np.ndarray)
77+
ArrayLike = Union["ExtensionArray", np.ndarray]
78+
AnyArrayLike = Union[ArrayLike, "Index", "Series"]
7979

8080
# scalars
8181

0 commit comments

Comments
 (0)