diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index c0aa661266d29..6b27b2204e75e 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -60,7 +60,7 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): val = arr[i] if isinstance(val, bytes): data = val - elif isinstance(val, unicode): + elif isinstance(val, str): data = val.encode(encoding) elif val is None or is_nan(val): # null, stringify and encode diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 17f1d011af01b..1cbdb0df6233c 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -667,7 +667,7 @@ cdef class StringHashTable(HashTable): for i in range(n): val = values[i] - if isinstance(val, (str, unicode)): + if isinstance(val, str): v = get_c_string(val) else: v = get_c_string(self.na_string_sentinel) @@ -700,7 +700,7 @@ cdef class StringHashTable(HashTable): for i in range(n): val = values[i] - if isinstance(val, (str, unicode)): + if isinstance(val, str): v = get_c_string(val) else: v = get_c_string(self.na_string_sentinel) @@ -774,7 +774,7 @@ cdef class StringHashTable(HashTable): val = values[i] if (ignore_na - and (not isinstance(val, (str, unicode)) + and (not isinstance(val, str) or (use_na_value and val == na_value))): # if missing values do not count as unique values (i.e. if # ignore_na is True), we can skip the actual value, and diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index f5a42d7aef3ba..3f12ec4c15fc7 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -2249,7 +2249,7 @@ cdef _apply_converter(object f, parser_t *parser, int64_t col, def _maybe_encode(values): if values is None: return [] - return [x.encode('utf-8') if isinstance(x, unicode) else x for x in values] + return [x.encode('utf-8') if isinstance(x, str) else x for x in values] def sanitize_objects(ndarray[object] values, set na_values, diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 2ed85595f7e3a..8f5c8d10776df 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -22,7 +22,7 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.nattype cimport NPY_NAT -def get_time_micros(ndarray[int64_t] dtindex): +def get_time_micros(const int64_t[:] dtindex): """ Return the number of microseconds in the time component of a nanosecond timestamp. @@ -537,7 +537,7 @@ def get_date_field(const int64_t[:] dtindex, object field): elif field == 'is_leap_year': return isleapyear_arr(get_date_field(dtindex, 'Y')) - raise ValueError("Field %s not supported" % field) + raise ValueError("Field {field} not supported".format(field=field)) @cython.wraparound(False) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index ca70c8af45f2f..33665484311ba 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -252,9 +252,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): ------- datetime, datetime/dateutil.parser._result, str """ - if not isinstance(arg, (str, unicode)): - # Note: cython recognizes `unicode` in both py2/py3, optimizes - # this check into a C call. + if not isinstance(arg, str): return arg if getattr(freq, "_typ", None) == "dateoffset": @@ -370,7 +368,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, int year, quarter = -1, month, mnum, date_len # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1 - assert isinstance(date_string, (str, unicode)) + assert isinstance(date_string, str) # len(date_string) == 0 # should be NaT??? @@ -517,7 +515,7 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False, tzdata = tzinfos.get(res.tzname) if isinstance(tzdata, datetime.tzinfo): tzinfo = tzdata - elif isinstance(tzdata, (str, unicode)): + elif isinstance(tzdata, str): tzinfo = _dateutil_tzstr(tzdata) elif isinstance(tzdata, int): tzinfo = tzoffset(res.tzname, tzdata) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 32dcc86faa7e8..84a41b8757001 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -2448,7 +2448,10 @@ class Period(_Period): converted = other.asfreq(freq) ordinal = converted.ordinal - elif is_null_datetimelike(value) or value in nat_strings: + elif is_null_datetimelike(value) or (isinstance(value, str) and + value in nat_strings): + # explicit str check is necessary to avoid raising incorrectly + # if we have a non-hashable value. ordinal = NPY_NAT elif isinstance(value, str) or util.is_integer_object(value): diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index bda5f8f4326f1..958650e3842fa 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1148,7 +1148,7 @@ def _addsub_offset_array(self, other, op): ) # For EA self.astype('O') returns a numpy array, not an Index - left = lib.values_from_object(self.astype("O")) + left = self.astype("O") res_values = op(left, np.array(other)) kwargs = {} diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index f2d74794eadf5..43208d98abd3c 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -70,7 +70,7 @@ def _period_array_cmp(cls, op): nat_result = opname == "__ne__" def wrapper(self, other): - op = getattr(self.asi8, opname) + ordinal_op = getattr(self.asi8, opname) other = lib.item_from_zerodim(other) if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)): @@ -82,11 +82,11 @@ def wrapper(self, other): if isinstance(other, Period): self._check_compatible_with(other) - result = op(other.ordinal) + result = ordinal_op(other.ordinal) elif isinstance(other, cls): self._check_compatible_with(other) - result = op(other.asi8) + result = ordinal_op(other.asi8) mask = self._isnan | other._isnan if mask.any(): @@ -98,7 +98,7 @@ def wrapper(self, other): result.fill(nat_result) else: other = Period(other, freq=self.freq) - result = op(other.ordinal) + result = ordinal_op(other.ordinal) if self._hasnans: result[self._isnan] = nat_result diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 6c9462ff4fa4d..21e07b5101a64 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -553,7 +553,7 @@ def __mul__(self, other): # for that instead of ValueError raise ValueError("Cannot multiply with unequal lengths") - if is_object_dtype(other): + if is_object_dtype(other.dtype): # this multiplication will succeed only if all elements of other # are int or float scalars, so we will end up with # timedelta64[ns]-dtyped result @@ -601,11 +601,11 @@ def __truediv__(self, other): if len(other) != len(self): raise ValueError("Cannot divide vectors with unequal lengths") - elif is_timedelta64_dtype(other): + elif is_timedelta64_dtype(other.dtype): # let numpy handle it return self._data / other - elif is_object_dtype(other): + elif is_object_dtype(other.dtype): # Note: we do not do type inference on the result, so either # an object array or numeric-dtyped (if numpy does inference) # will be returned. GH#23829 @@ -649,12 +649,12 @@ def __rtruediv__(self, other): if len(other) != len(self): raise ValueError("Cannot divide vectors with unequal lengths") - elif is_timedelta64_dtype(other): + elif is_timedelta64_dtype(other.dtype): # let numpy handle it return other / self._data - elif is_object_dtype(other): - # Note: unlike in __truediv__, we do not _need_ to do type# + elif is_object_dtype(other.dtype): + # Note: unlike in __truediv__, we do not _need_ to do type # inference on the result. It does not raise, a numeric array # is returned. GH#23829 result = [other[n] / self[n] for n in range(len(self))] @@ -701,7 +701,7 @@ def __floordiv__(self, other): if len(other) != len(self): raise ValueError("Cannot divide with unequal lengths") - elif is_timedelta64_dtype(other): + elif is_timedelta64_dtype(other.dtype): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate @@ -713,7 +713,7 @@ def __floordiv__(self, other): result[mask] = np.nan return result - elif is_object_dtype(other): + elif is_object_dtype(other.dtype): result = [self[n] // other[n] for n in range(len(self))] result = np.array(result) if lib.infer_dtype(result, skipna=False) == "timedelta": @@ -721,7 +721,7 @@ def __floordiv__(self, other): return type(self)(result) return result - elif is_integer_dtype(other) or is_float_dtype(other): + elif is_integer_dtype(other.dtype) or is_float_dtype(other.dtype): result = self._data // other return type(self)(result) @@ -763,7 +763,7 @@ def __rfloordiv__(self, other): if len(other) != len(self): raise ValueError("Cannot divide with unequal lengths") - elif is_timedelta64_dtype(other): + elif is_timedelta64_dtype(other.dtype): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate @@ -775,7 +775,7 @@ def __rfloordiv__(self, other): result[mask] = np.nan return result - elif is_object_dtype(other): + elif is_object_dtype(other.dtype): result = [other[n] // self[n] for n in range(len(self))] result = np.array(result) return result diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index a225eec93b27e..8c9a4b94446c0 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -161,7 +161,7 @@ def arithmetic_op( right: Any, op, str_rep: str, - eval_kwargs: Dict[str, str], + eval_kwargs: Dict[str, bool], ): """ Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ... diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 0e1cd42329169..73eddf91325ae 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -400,7 +400,7 @@ def test_combineFrame(self, float_frame, mixed_float_frame, mixed_int_frame): added = float_frame + mixed_int_frame _check_mixed_float(added, dtype="float64") - def test_combineSeries( + def test_combine_series( self, float_frame, mixed_float_frame, mixed_int_frame, datetime_frame ): @@ -432,6 +432,7 @@ def test_combineSeries( added = mixed_float_frame + series.astype("float16") _check_mixed_float(added, dtype=dict(C=None)) + # FIXME: don't leave commented-out # these raise with numexpr.....as we are adding an int64 to an # uint64....weird vs int diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 82c197ac054f0..f5f6c9ad6b3da 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -122,7 +122,8 @@ def test_ops(self): result = getattr(df, rop)(m) assert_frame_equal(result, expected) - # GH7192 + # GH7192: Note we need a large number of rows to ensure this + # goes through the numexpr path df = DataFrame(dict(A=np.random.randn(25000))) df.iloc[0:5] = np.nan expected = 1 - np.isnan(df.iloc[0:25])