Skip to content

Commit 89430f1

Browse files
Merge remote-tracking branch 'upstream/master' into pandas.io.sas.sasreader
2 parents 0d8cf0e + 4206fd4 commit 89430f1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+1532
-1092
lines changed

.pre-commit-config.yaml

+12
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,15 @@ repos:
1616
- id: isort
1717
language: python_venv
1818
exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$
19+
- repo: https://github.com/pre-commit/mirrors-mypy
20+
rev: v0.730
21+
hooks:
22+
- id: mypy
23+
# We run mypy over all files because of:
24+
# * changes in type definitions may affect non-touched files.
25+
# * Running it with `mypy pandas` and the filenames will lead to
26+
# spurious duplicate module errors,
27+
# see also https://github.com/pre-commit/mirrors-mypy/issues/5
28+
pass_filenames: false
29+
args:
30+
- pandas

ci/deps/azure-36-locale.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- cython>=0.29.13
1010
- pytest>=5.0.1
1111
- pytest-xdist>=1.21
12+
- pytest-asyncio
1213
- hypothesis>=3.58.0
1314
- pytest-azurepipelines
1415

ci/deps/azure-37-locale.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies:
88
- cython>=0.29.13
99
- pytest>=5.0.1
1010
- pytest-xdist>=1.21
11+
- pytest-asyncio
1112
- hypothesis>=3.58.0
1213
- pytest-azurepipelines
1314

doc/source/development/contributing.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1363,6 +1363,7 @@ some common prefixes along with general guidelines for when to use them:
13631363
* TST: Additions/updates to tests
13641364
* BLD: Updates to the build process/scripts
13651365
* PERF: Performance improvement
1366+
* TYP: Type annotations
13661367
* CLN: Code cleanup
13671368

13681369
The following defines how a commit message should be structured. Please reference the

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1016,6 +1016,7 @@ Other
10161016
- Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`)
10171017
- :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`)
10181018
- Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`)
1019+
- Backtick quoting in :meth:`DataFrame.query` and :meth:`DataFrame.eval` can now also be used to use invalid identifiers like names that start with a digit, are python keywords, or are using single character operators. (:issue:`27017`)
10191020
- Bug in ``pd.core.util.hashing.hash_pandas_object`` where arrays containing tuples were incorrectly treated as non-hashable (:issue:`28969`)
10201021
- Bug in :meth:`DataFrame.append` that raised ``IndexError`` when appending with empty list (:issue:`28769`)
10211022
- Fix :class:`AbstractHolidayCalendar` to return correct results for

environment.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ dependencies:
5555
- pytest>=5.0.1
5656
- pytest-cov
5757
- pytest-xdist>=1.21
58+
- pytest-asyncio
5859

5960
# downstream tests
6061
- seaborn
@@ -70,7 +71,7 @@ dependencies:
7071
- blosc
7172
- bottleneck>=1.2.1
7273
- ipykernel
73-
- ipython>=5.6.0,<=7.10.1 # see gh-30527
74+
- ipython>=7.11.1
7475
- jinja2 # pandas.Styler
7576
- matplotlib>=2.2.2 # pandas.plotting, Series.plot, DataFrame.plot
7677
- numexpr>=2.6.8

pandas/_libs/src/ujson/python/objToJSON.c

+17-43
Original file line numberDiff line numberDiff line change
@@ -241,65 +241,39 @@ static int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) {
241241
static PyObject *get_values(PyObject *obj) {
242242
PyObject *values = NULL;
243243

244-
values = PyObject_GetAttrString(obj, "values");
245244
PRINTMARK();
246245

247-
if (values && !PyArray_CheckExact(values)) {
248-
249-
if (PyObject_HasAttrString(values, "to_numpy")) {
250-
values = PyObject_CallMethod(values, "to_numpy", NULL);
251-
}
252-
253-
if (PyObject_HasAttrString(values, "values")) {
254-
PyObject *subvals = get_values(values);
255-
PyErr_Clear();
256-
PRINTMARK();
257-
// subvals are sometimes missing a dimension
258-
if (subvals) {
259-
PyArrayObject *reshape = (PyArrayObject *)subvals;
260-
PyObject *shape = PyObject_GetAttrString(obj, "shape");
261-
PyArray_Dims dims;
262-
PRINTMARK();
263-
264-
if (!shape || !PyArray_IntpConverter(shape, &dims)) {
265-
subvals = NULL;
266-
} else {
267-
subvals = PyArray_Newshape(reshape, &dims, NPY_ANYORDER);
268-
PyDimMem_FREE(dims.ptr);
269-
}
270-
Py_DECREF(reshape);
271-
Py_XDECREF(shape);
272-
}
273-
Py_DECREF(values);
274-
values = subvals;
275-
} else {
276-
PRINTMARK();
277-
Py_DECREF(values);
278-
values = NULL;
279-
}
280-
}
281-
282-
if (!values && PyObject_HasAttrString(obj, "_internal_get_values")) {
246+
if (PyObject_HasAttrString(obj, "_internal_get_values")) {
283247
PRINTMARK();
284248
values = PyObject_CallMethod(obj, "_internal_get_values", NULL);
285-
if (values && !PyArray_CheckExact(values)) {
249+
250+
if (values == NULL) {
251+
// Clear so we can subsequently try another method
252+
PyErr_Clear();
253+
} else if (!PyArray_CheckExact(values)) {
254+
// Didn't get a numpy array, so keep trying
286255
PRINTMARK();
287256
Py_DECREF(values);
288257
values = NULL;
289258
}
290259
}
291260

292-
if (!values && PyObject_HasAttrString(obj, "get_block_values")) {
261+
if ((values == NULL) && PyObject_HasAttrString(obj, "get_block_values")) {
293262
PRINTMARK();
294263
values = PyObject_CallMethod(obj, "get_block_values", NULL);
295-
if (values && !PyArray_CheckExact(values)) {
264+
265+
if (values == NULL) {
266+
// Clear so we can subsequently try another method
267+
PyErr_Clear();
268+
} else if (!PyArray_CheckExact(values)) {
269+
// Didn't get a numpy array, so keep trying
296270
PRINTMARK();
297271
Py_DECREF(values);
298272
values = NULL;
299273
}
300274
}
301275

302-
if (!values) {
276+
if (values == NULL) {
303277
PyObject *typeRepr = PyObject_Repr((PyObject *)Py_TYPE(obj));
304278
PyObject *repr;
305279
PRINTMARK();
@@ -435,8 +409,8 @@ static char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) {
435409
}
436410

437411
/* JSON callback. returns a char* and mutates the pointer to *len */
438-
static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused), JSONTypeContext *tc,
439-
size_t *len) {
412+
static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused),
413+
JSONTypeContext *tc, size_t *len) {
440414
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
441415
return int64ToIso(GET_TC(tc)->longValue, base, len);
442416
}

pandas/core/arrays/boolean.py

+2-13
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ class BooleanDtype(ExtensionDtype):
6262
BooleanDtype
6363
"""
6464

65+
name = "boolean"
66+
6567
@property
6668
def na_value(self) -> "Scalar":
6769
"""
@@ -81,19 +83,6 @@ def type(self) -> Type:
8183
def kind(self) -> str:
8284
return "b"
8385

84-
@property
85-
def name(self) -> str:
86-
"""
87-
The alias for BooleanDtype is ``'boolean'``.
88-
"""
89-
return "boolean"
90-
91-
@classmethod
92-
def construct_from_string(cls, string: str) -> ExtensionDtype:
93-
if string == "boolean":
94-
return cls()
95-
return super().construct_from_string(string)
96-
9786
@classmethod
9887
def construct_array_type(cls) -> "Type[BooleanArray]":
9988
return BooleanArray

pandas/core/arrays/period.py

+47-27
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
is_datetime64_dtype,
3030
is_float_dtype,
3131
is_list_like,
32+
is_object_dtype,
3233
is_period_dtype,
3334
pandas_dtype,
3435
)
@@ -41,6 +42,7 @@
4142
)
4243
from pandas.core.dtypes.missing import isna, notna
4344

45+
from pandas.core import ops
4446
import pandas.core.algorithms as algos
4547
from pandas.core.arrays import datetimelike as dtl
4648
import pandas.core.common as com
@@ -92,22 +94,44 @@ def wrapper(self, other):
9294
self._check_compatible_with(other)
9395

9496
result = ordinal_op(other.ordinal)
95-
elif isinstance(other, cls):
96-
self._check_compatible_with(other)
97-
98-
result = ordinal_op(other.asi8)
99-
100-
mask = self._isnan | other._isnan
101-
if mask.any():
102-
result[mask] = nat_result
10397

104-
return result
10598
elif other is NaT:
10699
result = np.empty(len(self.asi8), dtype=bool)
107100
result.fill(nat_result)
108-
else:
101+
102+
elif not is_list_like(other):
109103
return invalid_comparison(self, other, op)
110104

105+
else:
106+
if isinstance(other, list):
107+
# TODO: could use pd.Index to do inference?
108+
other = np.array(other)
109+
110+
if not isinstance(other, (np.ndarray, cls)):
111+
return invalid_comparison(self, other, op)
112+
113+
if is_object_dtype(other):
114+
with np.errstate(all="ignore"):
115+
result = ops.comp_method_OBJECT_ARRAY(
116+
op, self.astype(object), other
117+
)
118+
o_mask = isna(other)
119+
120+
elif not is_period_dtype(other):
121+
# e.g. is_timedelta64_dtype(other)
122+
return invalid_comparison(self, other, op)
123+
124+
else:
125+
assert isinstance(other, cls), type(other)
126+
127+
self._check_compatible_with(other)
128+
129+
result = ordinal_op(other.asi8)
130+
o_mask = other._isnan
131+
132+
if o_mask.any():
133+
result[o_mask] = nat_result
134+
111135
if self._hasnans:
112136
result[self._isnan] = nat_result
113137

@@ -215,12 +239,7 @@ def __init__(self, values, freq=None, dtype=None, copy=False):
215239

216240
if isinstance(values, type(self)):
217241
if freq is not None and freq != values.freq:
218-
msg = DIFFERENT_FREQ.format(
219-
cls=type(self).__name__,
220-
own_freq=values.freq.freqstr,
221-
other_freq=freq.freqstr,
222-
)
223-
raise IncompatibleFrequency(msg)
242+
raise raise_on_incompatible(values, freq)
224243
values, freq = values._data, values.freq
225244

226245
values = np.array(values, dtype="int64", copy=copy)
@@ -323,7 +342,7 @@ def _check_compatible_with(self, other):
323342
if other is NaT:
324343
return
325344
if self.freqstr != other.freqstr:
326-
_raise_on_incompatible(self, other)
345+
raise raise_on_incompatible(self, other)
327346

328347
# --------------------------------------------------------------------
329348
# Data / Attributes
@@ -332,7 +351,7 @@ def _check_compatible_with(self, other):
332351
def dtype(self):
333352
return self._dtype
334353

335-
# read-only property overwriting read/write
354+
# error: Read-only property cannot override read-write property [misc]
336355
@property # type: ignore
337356
def freq(self):
338357
"""
@@ -654,7 +673,7 @@ def _sub_period(self, other):
654673
return new_data
655674

656675
def _addsub_int_array(
657-
self, other: np.ndarray, op: Callable[[Any], Any],
676+
self, other: np.ndarray, op: Callable[[Any, Any], Any],
658677
) -> "PeriodArray":
659678
"""
660679
Add or subtract array of integers; equivalent to applying
@@ -682,7 +701,7 @@ def _add_offset(self, other):
682701
assert not isinstance(other, Tick)
683702
base = libfrequencies.get_base_alias(other.rule_code)
684703
if base != self.freq.rule_code:
685-
_raise_on_incompatible(self, other)
704+
raise raise_on_incompatible(self, other)
686705

687706
# Note: when calling parent class's _add_timedeltalike_scalar,
688707
# it will call delta_to_nanoseconds(delta). Because delta here
@@ -750,7 +769,7 @@ def _add_delta(self, other):
750769
"""
751770
if not isinstance(self.freq, Tick):
752771
# We cannot add timedelta-like to non-tick PeriodArray
753-
_raise_on_incompatible(self, other)
772+
raise raise_on_incompatible(self, other)
754773

755774
new_ordinals = super()._add_delta(other)
756775
return type(self)(new_ordinals, freq=self.freq)
@@ -802,28 +821,29 @@ def _check_timedeltalike_freq_compat(self, other):
802821
# by which will be added to self.
803822
return delta
804823

805-
_raise_on_incompatible(self, other)
824+
raise raise_on_incompatible(self, other)
806825

807826

808827
PeriodArray._add_comparison_ops()
809828

810829

811-
def _raise_on_incompatible(left, right):
830+
def raise_on_incompatible(left, right):
812831
"""
813832
Helper function to render a consistent error message when raising
814833
IncompatibleFrequency.
815834
816835
Parameters
817836
----------
818837
left : PeriodArray
819-
right : DateOffset, Period, ndarray, or timedelta-like
838+
right : None, DateOffset, Period, ndarray, or timedelta-like
820839
821-
Raises
840+
Returns
822841
------
823842
IncompatibleFrequency
843+
Exception to be raised by the caller.
824844
"""
825845
# GH#24283 error message format depends on whether right is scalar
826-
if isinstance(right, np.ndarray):
846+
if isinstance(right, np.ndarray) or right is None:
827847
other_freq = None
828848
elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period, DateOffset)):
829849
other_freq = right.freqstr
@@ -833,7 +853,7 @@ def _raise_on_incompatible(left, right):
833853
msg = DIFFERENT_FREQ.format(
834854
cls=type(left).__name__, own_freq=left.freqstr, other_freq=other_freq
835855
)
836-
raise IncompatibleFrequency(msg)
856+
return IncompatibleFrequency(msg)
837857

838858

839859
# -------------------------------------------------------------------

pandas/core/arrays/string_.py

+2-13
Original file line numberDiff line numberDiff line change
@@ -47,26 +47,15 @@ class StringDtype(ExtensionDtype):
4747
StringDtype
4848
"""
4949

50+
name = "string"
51+
5052
#: StringDtype.na_value uses pandas.NA
5153
na_value = libmissing.NA
5254

5355
@property
5456
def type(self) -> Type:
5557
return str
5658

57-
@property
58-
def name(self) -> str:
59-
"""
60-
The alias for StringDtype is ``'string'``.
61-
"""
62-
return "string"
63-
64-
@classmethod
65-
def construct_from_string(cls, string: str) -> ExtensionDtype:
66-
if string == "string":
67-
return cls()
68-
return super().construct_from_string(string)
69-
7059
@classmethod
7160
def construct_array_type(cls) -> "Type[StringArray]":
7261
return StringArray

0 commit comments

Comments
 (0)