Skip to content

Commit 198280d

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into ERR/imporve_error_message_cut/qcut
2 parents a4596c8 + 4206fd4 commit 198280d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1157
-796
lines changed

ci/deps/azure-36-locale.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- cython>=0.29.13
1010
- pytest>=5.0.1
1111
- pytest-xdist>=1.21
12+
- pytest-asyncio
1213
- hypothesis>=3.58.0
1314
- pytest-azurepipelines
1415

ci/deps/azure-37-locale.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies:
88
- cython>=0.29.13
99
- pytest>=5.0.1
1010
- pytest-xdist>=1.21
11+
- pytest-asyncio
1112
- hypothesis>=3.58.0
1213
- pytest-azurepipelines
1314

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1016,6 +1016,7 @@ Other
10161016
- Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`)
10171017
- :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`)
10181018
- Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`)
1019+
- Backtick quoting in :meth:`DataFrame.query` and :meth:`DataFrame.eval` can now also be used to use invalid identifiers like names that start with a digit, are python keywords, or are using single character operators. (:issue:`27017`)
10191020
- Bug in ``pd.core.util.hashing.hash_pandas_object`` where arrays containing tuples were incorrectly treated as non-hashable (:issue:`28969`)
10201021
- Bug in :meth:`DataFrame.append` that raised ``IndexError`` when appending with empty list (:issue:`28769`)
10211022
- Fix :class:`AbstractHolidayCalendar` to return correct results for

environment.yml

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ dependencies:
5555
- pytest>=5.0.1
5656
- pytest-cov
5757
- pytest-xdist>=1.21
58+
- pytest-asyncio
5859

5960
# downstream tests
6061
- seaborn

pandas/core/arrays/period.py

+45-25
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
is_datetime64_dtype,
3030
is_float_dtype,
3131
is_list_like,
32+
is_object_dtype,
3233
is_period_dtype,
3334
pandas_dtype,
3435
)
@@ -41,6 +42,7 @@
4142
)
4243
from pandas.core.dtypes.missing import isna, notna
4344

45+
from pandas.core import ops
4446
import pandas.core.algorithms as algos
4547
from pandas.core.arrays import datetimelike as dtl
4648
import pandas.core.common as com
@@ -92,22 +94,44 @@ def wrapper(self, other):
9294
self._check_compatible_with(other)
9395

9496
result = ordinal_op(other.ordinal)
95-
elif isinstance(other, cls):
96-
self._check_compatible_with(other)
97-
98-
result = ordinal_op(other.asi8)
99-
100-
mask = self._isnan | other._isnan
101-
if mask.any():
102-
result[mask] = nat_result
10397

104-
return result
10598
elif other is NaT:
10699
result = np.empty(len(self.asi8), dtype=bool)
107100
result.fill(nat_result)
108-
else:
101+
102+
elif not is_list_like(other):
109103
return invalid_comparison(self, other, op)
110104

105+
else:
106+
if isinstance(other, list):
107+
# TODO: could use pd.Index to do inference?
108+
other = np.array(other)
109+
110+
if not isinstance(other, (np.ndarray, cls)):
111+
return invalid_comparison(self, other, op)
112+
113+
if is_object_dtype(other):
114+
with np.errstate(all="ignore"):
115+
result = ops.comp_method_OBJECT_ARRAY(
116+
op, self.astype(object), other
117+
)
118+
o_mask = isna(other)
119+
120+
elif not is_period_dtype(other):
121+
# e.g. is_timedelta64_dtype(other)
122+
return invalid_comparison(self, other, op)
123+
124+
else:
125+
assert isinstance(other, cls), type(other)
126+
127+
self._check_compatible_with(other)
128+
129+
result = ordinal_op(other.asi8)
130+
o_mask = other._isnan
131+
132+
if o_mask.any():
133+
result[o_mask] = nat_result
134+
111135
if self._hasnans:
112136
result[self._isnan] = nat_result
113137

@@ -215,12 +239,7 @@ def __init__(self, values, freq=None, dtype=None, copy=False):
215239

216240
if isinstance(values, type(self)):
217241
if freq is not None and freq != values.freq:
218-
msg = DIFFERENT_FREQ.format(
219-
cls=type(self).__name__,
220-
own_freq=values.freq.freqstr,
221-
other_freq=freq.freqstr,
222-
)
223-
raise IncompatibleFrequency(msg)
242+
raise raise_on_incompatible(values, freq)
224243
values, freq = values._data, values.freq
225244

226245
values = np.array(values, dtype="int64", copy=copy)
@@ -323,7 +342,7 @@ def _check_compatible_with(self, other):
323342
if other is NaT:
324343
return
325344
if self.freqstr != other.freqstr:
326-
_raise_on_incompatible(self, other)
345+
raise raise_on_incompatible(self, other)
327346

328347
# --------------------------------------------------------------------
329348
# Data / Attributes
@@ -682,7 +701,7 @@ def _add_offset(self, other):
682701
assert not isinstance(other, Tick)
683702
base = libfrequencies.get_base_alias(other.rule_code)
684703
if base != self.freq.rule_code:
685-
_raise_on_incompatible(self, other)
704+
raise raise_on_incompatible(self, other)
686705

687706
# Note: when calling parent class's _add_timedeltalike_scalar,
688707
# it will call delta_to_nanoseconds(delta). Because delta here
@@ -750,7 +769,7 @@ def _add_delta(self, other):
750769
"""
751770
if not isinstance(self.freq, Tick):
752771
# We cannot add timedelta-like to non-tick PeriodArray
753-
_raise_on_incompatible(self, other)
772+
raise raise_on_incompatible(self, other)
754773

755774
new_ordinals = super()._add_delta(other)
756775
return type(self)(new_ordinals, freq=self.freq)
@@ -802,28 +821,29 @@ def _check_timedeltalike_freq_compat(self, other):
802821
# by which will be added to self.
803822
return delta
804823

805-
_raise_on_incompatible(self, other)
824+
raise raise_on_incompatible(self, other)
806825

807826

808827
PeriodArray._add_comparison_ops()
809828

810829

811-
def _raise_on_incompatible(left, right):
830+
def raise_on_incompatible(left, right):
812831
"""
813832
Helper function to render a consistent error message when raising
814833
IncompatibleFrequency.
815834
816835
Parameters
817836
----------
818837
left : PeriodArray
819-
right : DateOffset, Period, ndarray, or timedelta-like
838+
right : None, DateOffset, Period, ndarray, or timedelta-like
820839
821-
Raises
840+
Returns
822841
------
823842
IncompatibleFrequency
843+
Exception to be raised by the caller.
824844
"""
825845
# GH#24283 error message format depends on whether right is scalar
826-
if isinstance(right, np.ndarray):
846+
if isinstance(right, np.ndarray) or right is None:
827847
other_freq = None
828848
elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period, DateOffset)):
829849
other_freq = right.freqstr
@@ -833,7 +853,7 @@ def _raise_on_incompatible(left, right):
833853
msg = DIFFERENT_FREQ.format(
834854
cls=type(left).__name__, own_freq=left.freqstr, other_freq=other_freq
835855
)
836-
raise IncompatibleFrequency(msg)
856+
return IncompatibleFrequency(msg)
837857

838858

839859
# -------------------------------------------------------------------

pandas/core/computation/common.py

-14
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44

55
from pandas._config import get_option
66

7-
# A token value Python's tokenizer probably will never use.
8-
_BACKTICK_QUOTED_STRING = 100
9-
107

118
def _ensure_decoded(s):
129
"""
@@ -29,16 +26,5 @@ def result_type_many(*arrays_and_dtypes):
2926
return reduce(np.result_type, arrays_and_dtypes)
3027

3128

32-
def _remove_spaces_column_name(name):
33-
"""
34-
Check if name contains any spaces, if it contains any spaces
35-
the spaces will be removed and an underscore suffix is added.
36-
"""
37-
if not isinstance(name, str) or " " not in name:
38-
return name
39-
40-
return name.replace(" ", "_") + "_BACKTICK_QUOTED_STRING"
41-
42-
4329
class NameResolutionError(NameError):
4430
pass

pandas/core/computation/eval.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
from pandas.util._validators import validate_bool_kwarg
1313

1414
from pandas.core.computation.engines import _engines
15-
from pandas.core.computation.expr import Expr, _parsers, tokenize_string
15+
from pandas.core.computation.expr import Expr, _parsers
16+
from pandas.core.computation.parsing import tokenize_string
1617
from pandas.core.computation.scope import ensure_scope
1718

1819
from pandas.io.formats.printing import pprint_thing

pandas/core/computation/expr.py

+4-68
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,13 @@
33

44
import ast
55
from functools import partial, reduce
6-
from io import StringIO
7-
import itertools as it
8-
import operator
6+
from keyword import iskeyword
97
import tokenize
108
from typing import Optional, Type
119

1210
import numpy as np
1311

1412
import pandas.core.common as com
15-
from pandas.core.computation.common import (
16-
_BACKTICK_QUOTED_STRING,
17-
_remove_spaces_column_name,
18-
)
1913
from pandas.core.computation.ops import (
2014
_LOCAL_TAG,
2115
BinOp,
@@ -34,38 +28,12 @@
3428
_unary_ops_syms,
3529
is_term,
3630
)
31+
from pandas.core.computation.parsing import clean_backtick_quoted_toks, tokenize_string
3732
from pandas.core.computation.scope import Scope
3833

3934
import pandas.io.formats.printing as printing
4035

4136

42-
def tokenize_string(source: str):
43-
"""
44-
Tokenize a Python source code string.
45-
46-
Parameters
47-
----------
48-
source : str
49-
A Python source code string
50-
"""
51-
line_reader = StringIO(source).readline
52-
token_generator = tokenize.generate_tokens(line_reader)
53-
54-
# Loop over all tokens till a backtick (`) is found.
55-
# Then, take all tokens till the next backtick to form a backtick quoted
56-
# string.
57-
for toknum, tokval, _, _, _ in token_generator:
58-
if tokval == "`":
59-
tokval = " ".join(
60-
it.takewhile(
61-
lambda tokval: tokval != "`",
62-
map(operator.itemgetter(1), token_generator),
63-
)
64-
)
65-
toknum = _BACKTICK_QUOTED_STRING
66-
yield toknum, tokval
67-
68-
6937
def _rewrite_assign(tok):
7038
"""Rewrite the assignment operator for PyTables expressions that use ``=``
7139
as a substitute for ``==``.
@@ -133,31 +101,6 @@ def _replace_locals(tok):
133101
return toknum, tokval
134102

135103

136-
def _clean_spaces_backtick_quoted_names(tok):
137-
"""Clean up a column name if surrounded by backticks.
138-
139-
Backtick quoted string are indicated by a certain tokval value. If a string
140-
is a backtick quoted token it will processed by
141-
:func:`_remove_spaces_column_name` so that the parser can find this
142-
string when the query is executed.
143-
See also :meth:`NDFrame._get_space_character_free_column_resolver`.
144-
145-
Parameters
146-
----------
147-
tok : tuple of int, str
148-
ints correspond to the all caps constants in the tokenize module
149-
150-
Returns
151-
-------
152-
t : tuple of int, str
153-
Either the input or token or the replacement values
154-
"""
155-
toknum, tokval = tok
156-
if toknum == _BACKTICK_QUOTED_STRING:
157-
return tokenize.NAME, _remove_spaces_column_name(tokval)
158-
return toknum, tokval
159-
160-
161104
def _compose2(f, g):
162105
"""Compose 2 callables"""
163106
return lambda *args, **kwargs: f(g(*args, **kwargs))
@@ -172,10 +115,7 @@ def _compose(*funcs):
172115
def _preparse(
173116
source: str,
174117
f=_compose(
175-
_replace_locals,
176-
_replace_booleans,
177-
_rewrite_assign,
178-
_clean_spaces_backtick_quoted_names,
118+
_replace_locals, _replace_booleans, _rewrite_assign, clean_backtick_quoted_toks
179119
),
180120
):
181121
"""Compose a collection of tokenization functions
@@ -426,8 +366,6 @@ def visit(self, node, **kwargs):
426366
try:
427367
node = ast.fix_missing_locations(ast.parse(clean))
428368
except SyntaxError as e:
429-
from keyword import iskeyword
430-
431369
if any(iskeyword(x) for x in clean.split()):
432370
e.msg = "Python keyword not valid identifier in numexpr query"
433371
raise e
@@ -781,9 +719,7 @@ def __init__(
781719
parser,
782720
preparser=partial(
783721
_preparse,
784-
f=_compose(
785-
_replace_locals, _replace_booleans, _clean_spaces_backtick_quoted_names
786-
),
722+
f=_compose(_replace_locals, _replace_booleans, clean_backtick_quoted_toks),
787723
),
788724
):
789725
super().__init__(env, engine, parser, preparser)

0 commit comments

Comments
 (0)