Skip to content

Commit 537c458

Browse files
Merge remote-tracking branch 'upstream/master' into typing
2 parents c00967c + 9a3e1ef commit 537c458

26 files changed

+217
-139
lines changed

doc/source/development/contributing.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ installed (or you wish to install a newer version) you can install a compiler
172172
yum groupinstall "Development Tools"
173173

174174
For other Linux distributions, consult your favourite search engine for
175-
commpiler installation instructions.
175+
compiler installation instructions.
176176

177177
Let us know if you have any difficulties by opening an issue or reaching out on
178178
`Gitter`_.

doc/source/whatsnew/v1.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,8 @@ I/O
311311
- Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`)
312312
- Bug in :meth:`DataFrame.to_json` where a datetime column label would not be written out in ISO format with ``orient="table"`` (:issue:`28130`)
313313
- Bug in :func:`DataFrame.to_parquet` where writing to GCS would fail with `engine='fastparquet'` if the file did not already exist (:issue:`28326`)
314+
- Bug in :func:`read_hdf` closing stores that it didn't open when Exceptions are raised (:issue:`28699`)
315+
- Bug in :meth:`DataFrame.read_json` where using ``orient="index"`` would not maintain the order (:issue:`28557`)
314316
- Bug in :meth:`DataFrame.to_html` where the length of the ``formatters`` argument was not verified (:issue:`28469`)
315317

316318
Plotting

pandas/_libs/hashing.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
6060
val = arr[i]
6161
if isinstance(val, bytes):
6262
data = <bytes>val
63-
elif isinstance(val, unicode):
63+
elif isinstance(val, str):
6464
data = <bytes>val.encode(encoding)
6565
elif val is None or is_nan(val):
6666
# null, stringify and encode

pandas/_libs/hashtable_class_helper.pxi.in

+3-3
Original file line numberDiff line numberDiff line change
@@ -667,7 +667,7 @@ cdef class StringHashTable(HashTable):
667667
for i in range(n):
668668
val = values[i]
669669

670-
if isinstance(val, (str, unicode)):
670+
if isinstance(val, str):
671671
v = get_c_string(val)
672672
else:
673673
v = get_c_string(self.na_string_sentinel)
@@ -700,7 +700,7 @@ cdef class StringHashTable(HashTable):
700700
for i in range(n):
701701
val = values[i]
702702

703-
if isinstance(val, (str, unicode)):
703+
if isinstance(val, str):
704704
v = get_c_string(val)
705705
else:
706706
v = get_c_string(self.na_string_sentinel)
@@ -774,7 +774,7 @@ cdef class StringHashTable(HashTable):
774774
val = values[i]
775775

776776
if (ignore_na
777-
and (not isinstance(val, (str, unicode))
777+
and (not isinstance(val, str)
778778
or (use_na_value and val == na_value))):
779779
# if missing values do not count as unique values (i.e. if
780780
# ignore_na is True), we can skip the actual value, and

pandas/_libs/parsers.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -2249,7 +2249,7 @@ cdef _apply_converter(object f, parser_t *parser, int64_t col,
22492249
def _maybe_encode(values):
22502250
if values is None:
22512251
return []
2252-
return [x.encode('utf-8') if isinstance(x, unicode) else x for x in values]
2252+
return [x.encode('utf-8') if isinstance(x, str) else x for x in values]
22532253

22542254

22552255
def sanitize_objects(ndarray[object] values, set na_values,

pandas/_libs/tslibs/fields.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ from pandas._libs.tslibs.np_datetime cimport (
2222
from pandas._libs.tslibs.nattype cimport NPY_NAT
2323

2424

25-
def get_time_micros(ndarray[int64_t] dtindex):
25+
def get_time_micros(const int64_t[:] dtindex):
2626
"""
2727
Return the number of microseconds in the time component of a
2828
nanosecond timestamp.
@@ -537,7 +537,7 @@ def get_date_field(const int64_t[:] dtindex, object field):
537537
elif field == 'is_leap_year':
538538
return isleapyear_arr(get_date_field(dtindex, 'Y'))
539539

540-
raise ValueError("Field %s not supported" % field)
540+
raise ValueError("Field {field} not supported".format(field=field))
541541

542542

543543
@cython.wraparound(False)

pandas/_libs/tslibs/parsing.pyx

+3-5
Original file line numberDiff line numberDiff line change
@@ -252,9 +252,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
252252
-------
253253
datetime, datetime/dateutil.parser._result, str
254254
"""
255-
if not isinstance(arg, (str, unicode)):
256-
# Note: cython recognizes `unicode` in both py2/py3, optimizes
257-
# this check into a C call.
255+
if not isinstance(arg, str):
258256
return arg
259257

260258
if getattr(freq, "_typ", None) == "dateoffset":
@@ -370,7 +368,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default,
370368
int year, quarter = -1, month, mnum, date_len
371369

372370
# special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1
373-
assert isinstance(date_string, (str, unicode))
371+
assert isinstance(date_string, str)
374372

375373
# len(date_string) == 0
376374
# should be NaT???
@@ -517,7 +515,7 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False,
517515
tzdata = tzinfos.get(res.tzname)
518516
if isinstance(tzdata, datetime.tzinfo):
519517
tzinfo = tzdata
520-
elif isinstance(tzdata, (str, unicode)):
518+
elif isinstance(tzdata, str):
521519
tzinfo = _dateutil_tzstr(tzdata)
522520
elif isinstance(tzdata, int):
523521
tzinfo = tzoffset(res.tzname, tzdata)

pandas/_libs/tslibs/period.pyx

+7-4
Original file line numberDiff line numberDiff line change
@@ -1710,7 +1710,7 @@ cdef class _Period:
17101710
def asfreq(self, freq, how='E'):
17111711
"""
17121712
Convert Period to desired frequency, either at the start or end of the
1713-
interval
1713+
interval.
17141714
17151715
Parameters
17161716
----------
@@ -1777,7 +1777,7 @@ cdef class _Period:
17771777
def to_timestamp(self, freq=None, how='start', tz=None):
17781778
"""
17791779
Return the Timestamp representation of the Period at the target
1780-
frequency at the specified end (how) of the Period
1780+
frequency at the specified end (how) of the Period.
17811781
17821782
Parameters
17831783
----------
@@ -2380,7 +2380,7 @@ cdef class _Period:
23802380

23812381
class Period(_Period):
23822382
"""
2383-
Represents a period of time
2383+
Represents a period of time.
23842384
23852385
Parameters
23862386
----------
@@ -2448,7 +2448,10 @@ class Period(_Period):
24482448
converted = other.asfreq(freq)
24492449
ordinal = converted.ordinal
24502450

2451-
elif is_null_datetimelike(value) or value in nat_strings:
2451+
elif is_null_datetimelike(value) or (isinstance(value, str) and
2452+
value in nat_strings):
2453+
# explicit str check is necessary to avoid raising incorrectly
2454+
# if we have a non-hashable value.
24522455
ordinal = NPY_NAT
24532456

24542457
elif isinstance(value, str) or util.is_integer_object(value):

pandas/_libs/tslibs/timedeltas.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -1344,7 +1344,7 @@ class Timedelta(_Timedelta):
13441344

13451345
def floor(self, freq):
13461346
"""
1347-
return a new Timedelta floored to this resolution.
1347+
Return a new Timedelta floored to this resolution.
13481348
13491349
Parameters
13501350
----------
@@ -1355,7 +1355,7 @@ class Timedelta(_Timedelta):
13551355

13561356
def ceil(self, freq):
13571357
"""
1358-
return a new Timedelta ceiled to this resolution.
1358+
Return a new Timedelta ceiled to this resolution.
13591359
13601360
Parameters
13611361
----------

pandas/core/arrays/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1147,7 +1147,7 @@ def _addsub_offset_array(self, other, op):
11471147
)
11481148

11491149
# For EA self.astype('O') returns a numpy array, not an Index
1150-
left = lib.values_from_object(self.astype("O"))
1150+
left = self.astype("O")
11511151

11521152
res_values = op(left, np.array(other))
11531153
kwargs = {}

pandas/core/arrays/interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -979,7 +979,7 @@ def length(self):
979979
@property
980980
def mid(self):
981981
"""
982-
Return the midpoint of each Interval in the IntervalArray as an Index
982+
Return the midpoint of each Interval in the IntervalArray as an Index.
983983
"""
984984
try:
985985
return 0.5 * (self.left + self.right)

pandas/core/arrays/period.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def _period_array_cmp(cls, op):
7272
nat_result = opname == "__ne__"
7373

7474
def wrapper(self, other):
75-
op = getattr(self.asi8, opname)
75+
ordinal_op = getattr(self.asi8, opname)
7676

7777
other = lib.item_from_zerodim(other)
7878
if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
@@ -84,11 +84,11 @@ def wrapper(self, other):
8484
if isinstance(other, Period):
8585
self._check_compatible_with(other)
8686

87-
result = op(other.ordinal)
87+
result = ordinal_op(other.ordinal)
8888
elif isinstance(other, cls):
8989
self._check_compatible_with(other)
9090

91-
result = op(other.asi8)
91+
result = ordinal_op(other.asi8)
9292

9393
mask = self._isnan | other._isnan
9494
if mask.any():
@@ -100,7 +100,7 @@ def wrapper(self, other):
100100
result.fill(nat_result)
101101
else:
102102
other = Period(other, freq=self.freq)
103-
result = op(other.ordinal)
103+
result = ordinal_op(other.ordinal)
104104

105105
if self._hasnans:
106106
result[self._isnan] = nat_result

pandas/core/arrays/timedeltas.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,7 @@ def __mul__(self, other):
553553
# for that instead of ValueError
554554
raise ValueError("Cannot multiply with unequal lengths")
555555

556-
if is_object_dtype(other):
556+
if is_object_dtype(other.dtype):
557557
# this multiplication will succeed only if all elements of other
558558
# are int or float scalars, so we will end up with
559559
# timedelta64[ns]-dtyped result
@@ -601,11 +601,11 @@ def __truediv__(self, other):
601601
if len(other) != len(self):
602602
raise ValueError("Cannot divide vectors with unequal lengths")
603603

604-
elif is_timedelta64_dtype(other):
604+
elif is_timedelta64_dtype(other.dtype):
605605
# let numpy handle it
606606
return self._data / other
607607

608-
elif is_object_dtype(other):
608+
elif is_object_dtype(other.dtype):
609609
# Note: we do not do type inference on the result, so either
610610
# an object array or numeric-dtyped (if numpy does inference)
611611
# will be returned. GH#23829
@@ -649,12 +649,12 @@ def __rtruediv__(self, other):
649649
if len(other) != len(self):
650650
raise ValueError("Cannot divide vectors with unequal lengths")
651651

652-
elif is_timedelta64_dtype(other):
652+
elif is_timedelta64_dtype(other.dtype):
653653
# let numpy handle it
654654
return other / self._data
655655

656-
elif is_object_dtype(other):
657-
# Note: unlike in __truediv__, we do not _need_ to do type#
656+
elif is_object_dtype(other.dtype):
657+
# Note: unlike in __truediv__, we do not _need_ to do type
658658
# inference on the result. It does not raise, a numeric array
659659
# is returned. GH#23829
660660
result = [other[n] / self[n] for n in range(len(self))]
@@ -701,7 +701,7 @@ def __floordiv__(self, other):
701701
if len(other) != len(self):
702702
raise ValueError("Cannot divide with unequal lengths")
703703

704-
elif is_timedelta64_dtype(other):
704+
elif is_timedelta64_dtype(other.dtype):
705705
other = type(self)(other)
706706

707707
# numpy timedelta64 does not natively support floordiv, so operate
@@ -713,15 +713,15 @@ def __floordiv__(self, other):
713713
result[mask] = np.nan
714714
return result
715715

716-
elif is_object_dtype(other):
716+
elif is_object_dtype(other.dtype):
717717
result = [self[n] // other[n] for n in range(len(self))]
718718
result = np.array(result)
719719
if lib.infer_dtype(result, skipna=False) == "timedelta":
720720
result, _ = sequence_to_td64ns(result)
721721
return type(self)(result)
722722
return result
723723

724-
elif is_integer_dtype(other) or is_float_dtype(other):
724+
elif is_integer_dtype(other.dtype) or is_float_dtype(other.dtype):
725725
result = self._data // other
726726
return type(self)(result)
727727

@@ -763,7 +763,7 @@ def __rfloordiv__(self, other):
763763
if len(other) != len(self):
764764
raise ValueError("Cannot divide with unequal lengths")
765765

766-
elif is_timedelta64_dtype(other):
766+
elif is_timedelta64_dtype(other.dtype):
767767
other = type(self)(other)
768768

769769
# numpy timedelta64 does not natively support floordiv, so operate
@@ -775,7 +775,7 @@ def __rfloordiv__(self, other):
775775
result[mask] = np.nan
776776
return result
777777

778-
elif is_object_dtype(other):
778+
elif is_object_dtype(other.dtype):
779779
result = [other[n] // self[n] for n in range(len(self))]
780780
result = np.array(result)
781781
return result

0 commit comments

Comments
 (0)