Skip to content

Commit 397cad2

Browse files
jbrockmendelproost
authored andcommitted
CLN: assorted cleanups, remove unicode checks in cython (pandas-dev#28879)
1 parent 3286f2c commit 397cad2

12 files changed

+35
-32
lines changed

pandas/_libs/hashing.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
6060
val = arr[i]
6161
if isinstance(val, bytes):
6262
data = <bytes>val
63-
elif isinstance(val, unicode):
63+
elif isinstance(val, str):
6464
data = <bytes>val.encode(encoding)
6565
elif val is None or is_nan(val):
6666
# null, stringify and encode

pandas/_libs/hashtable_class_helper.pxi.in

+3-3
Original file line numberDiff line numberDiff line change
@@ -667,7 +667,7 @@ cdef class StringHashTable(HashTable):
667667
for i in range(n):
668668
val = values[i]
669669

670-
if isinstance(val, (str, unicode)):
670+
if isinstance(val, str):
671671
v = get_c_string(val)
672672
else:
673673
v = get_c_string(self.na_string_sentinel)
@@ -700,7 +700,7 @@ cdef class StringHashTable(HashTable):
700700
for i in range(n):
701701
val = values[i]
702702

703-
if isinstance(val, (str, unicode)):
703+
if isinstance(val, str):
704704
v = get_c_string(val)
705705
else:
706706
v = get_c_string(self.na_string_sentinel)
@@ -774,7 +774,7 @@ cdef class StringHashTable(HashTable):
774774
val = values[i]
775775

776776
if (ignore_na
777-
and (not isinstance(val, (str, unicode))
777+
and (not isinstance(val, str)
778778
or (use_na_value and val == na_value))):
779779
# if missing values do not count as unique values (i.e. if
780780
# ignore_na is True), we can skip the actual value, and

pandas/_libs/parsers.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -2249,7 +2249,7 @@ cdef _apply_converter(object f, parser_t *parser, int64_t col,
22492249
def _maybe_encode(values):
22502250
if values is None:
22512251
return []
2252-
return [x.encode('utf-8') if isinstance(x, unicode) else x for x in values]
2252+
return [x.encode('utf-8') if isinstance(x, str) else x for x in values]
22532253

22542254

22552255
def sanitize_objects(ndarray[object] values, set na_values,

pandas/_libs/tslibs/fields.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ from pandas._libs.tslibs.np_datetime cimport (
2222
from pandas._libs.tslibs.nattype cimport NPY_NAT
2323

2424

25-
def get_time_micros(ndarray[int64_t] dtindex):
25+
def get_time_micros(const int64_t[:] dtindex):
2626
"""
2727
Return the number of microseconds in the time component of a
2828
nanosecond timestamp.
@@ -537,7 +537,7 @@ def get_date_field(const int64_t[:] dtindex, object field):
537537
elif field == 'is_leap_year':
538538
return isleapyear_arr(get_date_field(dtindex, 'Y'))
539539

540-
raise ValueError("Field %s not supported" % field)
540+
raise ValueError("Field {field} not supported".format(field=field))
541541

542542

543543
@cython.wraparound(False)

pandas/_libs/tslibs/parsing.pyx

+3-5
Original file line numberDiff line numberDiff line change
@@ -252,9 +252,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
252252
-------
253253
datetime, datetime/dateutil.parser._result, str
254254
"""
255-
if not isinstance(arg, (str, unicode)):
256-
# Note: cython recognizes `unicode` in both py2/py3, optimizes
257-
# this check into a C call.
255+
if not isinstance(arg, str):
258256
return arg
259257

260258
if getattr(freq, "_typ", None) == "dateoffset":
@@ -370,7 +368,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default,
370368
int year, quarter = -1, month, mnum, date_len
371369

372370
# special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1
373-
assert isinstance(date_string, (str, unicode))
371+
assert isinstance(date_string, str)
374372

375373
# len(date_string) == 0
376374
# should be NaT???
@@ -517,7 +515,7 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False,
517515
tzdata = tzinfos.get(res.tzname)
518516
if isinstance(tzdata, datetime.tzinfo):
519517
tzinfo = tzdata
520-
elif isinstance(tzdata, (str, unicode)):
518+
elif isinstance(tzdata, str):
521519
tzinfo = _dateutil_tzstr(tzdata)
522520
elif isinstance(tzdata, int):
523521
tzinfo = tzoffset(res.tzname, tzdata)

pandas/_libs/tslibs/period.pyx

+4-1
Original file line numberDiff line numberDiff line change
@@ -2448,7 +2448,10 @@ class Period(_Period):
24482448
converted = other.asfreq(freq)
24492449
ordinal = converted.ordinal
24502450

2451-
elif is_null_datetimelike(value) or value in nat_strings:
2451+
elif is_null_datetimelike(value) or (isinstance(value, str) and
2452+
value in nat_strings):
2453+
# explicit str check is necessary to avoid raising incorrectly
2454+
# if we have a non-hashable value.
24522455
ordinal = NPY_NAT
24532456

24542457
elif isinstance(value, str) or util.is_integer_object(value):

pandas/core/arrays/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1148,7 +1148,7 @@ def _addsub_offset_array(self, other, op):
11481148
)
11491149

11501150
# For EA self.astype('O') returns a numpy array, not an Index
1151-
left = lib.values_from_object(self.astype("O"))
1151+
left = self.astype("O")
11521152

11531153
res_values = op(left, np.array(other))
11541154
kwargs = {}

pandas/core/arrays/period.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def _period_array_cmp(cls, op):
7070
nat_result = opname == "__ne__"
7171

7272
def wrapper(self, other):
73-
op = getattr(self.asi8, opname)
73+
ordinal_op = getattr(self.asi8, opname)
7474

7575
other = lib.item_from_zerodim(other)
7676
if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
@@ -82,11 +82,11 @@ def wrapper(self, other):
8282
if isinstance(other, Period):
8383
self._check_compatible_with(other)
8484

85-
result = op(other.ordinal)
85+
result = ordinal_op(other.ordinal)
8686
elif isinstance(other, cls):
8787
self._check_compatible_with(other)
8888

89-
result = op(other.asi8)
89+
result = ordinal_op(other.asi8)
9090

9191
mask = self._isnan | other._isnan
9292
if mask.any():
@@ -98,7 +98,7 @@ def wrapper(self, other):
9898
result.fill(nat_result)
9999
else:
100100
other = Period(other, freq=self.freq)
101-
result = op(other.ordinal)
101+
result = ordinal_op(other.ordinal)
102102

103103
if self._hasnans:
104104
result[self._isnan] = nat_result

pandas/core/arrays/timedeltas.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,7 @@ def __mul__(self, other):
553553
# for that instead of ValueError
554554
raise ValueError("Cannot multiply with unequal lengths")
555555

556-
if is_object_dtype(other):
556+
if is_object_dtype(other.dtype):
557557
# this multiplication will succeed only if all elements of other
558558
# are int or float scalars, so we will end up with
559559
# timedelta64[ns]-dtyped result
@@ -601,11 +601,11 @@ def __truediv__(self, other):
601601
if len(other) != len(self):
602602
raise ValueError("Cannot divide vectors with unequal lengths")
603603

604-
elif is_timedelta64_dtype(other):
604+
elif is_timedelta64_dtype(other.dtype):
605605
# let numpy handle it
606606
return self._data / other
607607

608-
elif is_object_dtype(other):
608+
elif is_object_dtype(other.dtype):
609609
# Note: we do not do type inference on the result, so either
610610
# an object array or numeric-dtyped (if numpy does inference)
611611
# will be returned. GH#23829
@@ -649,12 +649,12 @@ def __rtruediv__(self, other):
649649
if len(other) != len(self):
650650
raise ValueError("Cannot divide vectors with unequal lengths")
651651

652-
elif is_timedelta64_dtype(other):
652+
elif is_timedelta64_dtype(other.dtype):
653653
# let numpy handle it
654654
return other / self._data
655655

656-
elif is_object_dtype(other):
657-
# Note: unlike in __truediv__, we do not _need_ to do type#
656+
elif is_object_dtype(other.dtype):
657+
# Note: unlike in __truediv__, we do not _need_ to do type
658658
# inference on the result. It does not raise, a numeric array
659659
# is returned. GH#23829
660660
result = [other[n] / self[n] for n in range(len(self))]
@@ -701,7 +701,7 @@ def __floordiv__(self, other):
701701
if len(other) != len(self):
702702
raise ValueError("Cannot divide with unequal lengths")
703703

704-
elif is_timedelta64_dtype(other):
704+
elif is_timedelta64_dtype(other.dtype):
705705
other = type(self)(other)
706706

707707
# numpy timedelta64 does not natively support floordiv, so operate
@@ -713,15 +713,15 @@ def __floordiv__(self, other):
713713
result[mask] = np.nan
714714
return result
715715

716-
elif is_object_dtype(other):
716+
elif is_object_dtype(other.dtype):
717717
result = [self[n] // other[n] for n in range(len(self))]
718718
result = np.array(result)
719719
if lib.infer_dtype(result, skipna=False) == "timedelta":
720720
result, _ = sequence_to_td64ns(result)
721721
return type(self)(result)
722722
return result
723723

724-
elif is_integer_dtype(other) or is_float_dtype(other):
724+
elif is_integer_dtype(other.dtype) or is_float_dtype(other.dtype):
725725
result = self._data // other
726726
return type(self)(result)
727727

@@ -763,7 +763,7 @@ def __rfloordiv__(self, other):
763763
if len(other) != len(self):
764764
raise ValueError("Cannot divide with unequal lengths")
765765

766-
elif is_timedelta64_dtype(other):
766+
elif is_timedelta64_dtype(other.dtype):
767767
other = type(self)(other)
768768

769769
# numpy timedelta64 does not natively support floordiv, so operate
@@ -775,7 +775,7 @@ def __rfloordiv__(self, other):
775775
result[mask] = np.nan
776776
return result
777777

778-
elif is_object_dtype(other):
778+
elif is_object_dtype(other.dtype):
779779
result = [other[n] // self[n] for n in range(len(self))]
780780
result = np.array(result)
781781
return result

pandas/core/ops/array_ops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def arithmetic_op(
161161
right: Any,
162162
op,
163163
str_rep: str,
164-
eval_kwargs: Dict[str, str],
164+
eval_kwargs: Dict[str, bool],
165165
):
166166
"""
167167
Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ...

pandas/tests/frame/test_operators.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ def test_combineFrame(self, float_frame, mixed_float_frame, mixed_int_frame):
400400
added = float_frame + mixed_int_frame
401401
_check_mixed_float(added, dtype="float64")
402402

403-
def test_combineSeries(
403+
def test_combine_series(
404404
self, float_frame, mixed_float_frame, mixed_int_frame, datetime_frame
405405
):
406406

@@ -432,6 +432,7 @@ def test_combineSeries(
432432
added = mixed_float_frame + series.astype("float16")
433433
_check_mixed_float(added, dtype=dict(C=None))
434434

435+
# FIXME: don't leave commented-out
435436
# these raise with numexpr.....as we are adding an int64 to an
436437
# uint64....weird vs int
437438

pandas/tests/frame/test_query_eval.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ def test_ops(self):
122122
result = getattr(df, rop)(m)
123123
assert_frame_equal(result, expected)
124124

125-
# GH7192
125+
# GH7192: Note we need a large number of rows to ensure this
126+
# goes through the numexpr path
126127
df = DataFrame(dict(A=np.random.randn(25000)))
127128
df.iloc[0:5] = np.nan
128129
expected = 1 - np.isnan(df.iloc[0:25])

0 commit comments

Comments
 (0)