Skip to content

Commit 5f76f48

Browse files
committed
merge with upstream
2 parents f216a43 + 80f0a74 commit 5f76f48

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+374
-303
lines changed

ci/deps/travis-37-cov.yaml

+4-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
name: pandas-dev
22
channels:
3-
- defaults
43
- conda-forge
54
dependencies:
65
- python=3.7.*
@@ -15,7 +14,6 @@ dependencies:
1514
# pandas dependencies
1615
- beautifulsoup4
1716
- botocore>=1.11
18-
- cython>=0.29.16
1917
- dask
2018
- fastparquet>=0.3.2
2119
- fsspec>=0.7.4
@@ -31,16 +29,18 @@ dependencies:
3129
- odfpy
3230
- openpyxl
3331
- pandas-gbq
32+
- google-cloud-bigquery>=1.27.2 # GH 36436
3433
- psycopg2
3534
- pyarrow>=0.15.0
36-
- pymysql
35+
- pymysql=0.7.11
3736
- pytables
3837
- python-snappy
38+
- python-dateutil
3939
- pytz
4040
- s3fs>=0.4.0
4141
- scikit-learn
4242
- scipy
43-
- sqlalchemy
43+
- sqlalchemy=1.3.0
4444
- statsmodels
4545
- xarray
4646
- xlrd
@@ -51,5 +51,4 @@ dependencies:
5151
- brotlipy
5252
- coverage
5353
- pandas-datareader
54-
- python-dateutil
5554
- pyxlsb

ci/deps/travis-37-locale.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ dependencies:
2525
- numexpr
2626
- numpy
2727
- openpyxl
28-
- pandas-gbq=0.12.0
28+
- pandas-gbq
29+
- google-cloud-bigquery>=1.27.2 # GH 36436
2930
- pyarrow>=0.17
3031
- psycopg2=2.7
31-
- pyarrow>=0.15.0 # GH #35813
3232
- pymysql=0.7.11
3333
- pytables
3434
- python-dateutil

doc/make.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ def main():
291291

292292
joined = ", ".join(cmds)
293293
argparser.add_argument(
294-
"command", nargs="?", default="html", help=f"command to run: {joined}",
294+
"command", nargs="?", default="html", help=f"command to run: {joined}"
295295
)
296296
argparser.add_argument(
297297
"--num-jobs", type=int, default=0, help="number of jobs used by sphinx-build"

doc/source/user_guide/computation.rst

+9
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,15 @@ see the :ref:`groupby docs <groupby.transform.window_resample>`.
229229

230230
The API for window statistics is quite similar to the way one works with ``GroupBy`` objects, see the documentation :ref:`here <groupby>`.
231231

232+
.. warning::
233+
234+
When using ``rolling()`` and an associated function the results are calculated with rolling sums. As a consequence
235+
when having values differing with magnitude :math:`1/np.finfo(np.double).eps` this results in truncation. It must be
236+
noted, that large values may have an impact on windows, which do not include these values. `Kahan summation
237+
<https://en.wikipedia.org/wiki/Kahan_summation_algorithm>`__ is used
238+
to compute the rolling sums to preserve accuracy as much as possible. The same holds true for ``Rolling.var()`` for
239+
values differing with magnitude :math:`(1/np.finfo(np.double).eps)^{0.5}`.
240+
232241
We work with ``rolling``, ``expanding`` and ``exponentially weighted`` data through the corresponding
233242
objects, :class:`~pandas.core.window.Rolling`, :class:`~pandas.core.window.Expanding` and :class:`~pandas.core.window.ExponentialMovingWindow`.
234243

doc/source/whatsnew/v1.1.3.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ Fixed regressions
3434
- Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`)
3535
- Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`)
3636
- Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`,:issue:`35802`)
37-
-
37+
- Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`,:issue:`36377`)
3838

3939
.. ---------------------------------------------------------------------------
4040
@@ -47,6 +47,7 @@ Bug fixes
4747
- Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`)
4848
- Bug in :meth:`DataFrame.sort_values` raising an ``AttributeError`` when sorting on a key that casts column to categorical dtype (:issue:`36383`)
4949
- Bug in :meth:`DataFrame.stack` raising a ``ValueError`` when stacking :class:`MultiIndex` columns based on position when the levels had duplicate names (:issue:`36353`)
50+
- Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` when using ``NaN`` and a row length above 1,000,000 (:issue:`22205`)
5051

5152
.. ---------------------------------------------------------------------------
5253

doc/source/whatsnew/v1.2.0.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ Deprecations
212212
- Deprecated parameter ``dtype`` in :~meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype(:issue:`35853`)
213213
- Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` from ``pandas.io.date_converters`` are deprecated and will be removed in a future version; use :func:`to_datetime` instead (:issue:`35741`)
214214
- :meth:`DataFrame.lookup` is deprecated and will be removed in a future version, use :meth:`DataFrame.melt` and :meth:`DataFrame.loc` instead (:issue:`18682`)
215+
- The :meth:`Index.to_native_types` is deprecated. Use ``.astype(str)`` instead (:issue:`28867`)
215216

216217
.. ---------------------------------------------------------------------------
217218
@@ -225,6 +226,7 @@ Performance improvements
225226
- Performance improvement in :meth:`GroupBy.agg` with the ``numba`` engine (:issue:`35759`)
226227
- Performance improvements when creating :meth:`pd.Series.map` from a huge dictionary (:issue:`34717`)
227228
- Performance improvement in :meth:`GroupBy.transform` with the ``numba`` engine (:issue:`36240`)
229+
- ``Styler`` uuid method altered to compress data transmission over web whilst maintaining reasonably low table collision probability (:issue:`36345`)
228230
- Performance improvement in :meth:`pd.to_datetime` with non-`ns` time unit for `float` `dtype` columns (:issue:`20445`)
229231

230232
.. ---------------------------------------------------------------------------
@@ -368,7 +370,7 @@ Other
368370
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
369371
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
370372
- Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`)
371-
-
373+
- Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`)
372374

373375
.. ---------------------------------------------------------------------------
374376

pandas/_config/display.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def detect_console_encoding() -> str:
2222
encoding = None
2323
try:
2424
encoding = sys.stdout.encoding or sys.stdin.encoding
25-
except (AttributeError, IOError):
25+
except (AttributeError, OSError):
2626
pass
2727

2828
# try again for something better

pandas/_libs/reduction.pyx

+12-5
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ from pandas._libs cimport util
1616
from pandas._libs.lib import is_scalar, maybe_convert_objects
1717

1818

19-
cdef _check_result_array(object obj, Py_ssize_t cnt):
19+
cpdef check_result_array(object obj, Py_ssize_t cnt):
2020

2121
if (util.is_array(obj) or
2222
(isinstance(obj, list) and len(obj) == cnt) or
2323
getattr(obj, 'shape', None) == (cnt,)):
24-
raise ValueError('Function does not reduce')
24+
raise ValueError('Must produce aggregated value')
2525

2626

2727
cdef class _BaseGrouper:
@@ -74,12 +74,14 @@ cdef class _BaseGrouper:
7474
cached_ityp._engine.clear_mapping()
7575
cached_ityp._cache.clear() # e.g. inferred_freq must go
7676
res = self.f(cached_typ)
77-
res = _extract_result(res)
77+
res = extract_result(res)
7878
if not initialized:
7979
# On the first pass, we check the output shape to see
8080
# if this looks like a reduction.
8181
initialized = True
82-
_check_result_array(res, len(self.dummy_arr))
82+
# In all tests other than test_series_grouper and
83+
# test_series_bin_grouper, we have len(self.dummy_arr) == 0
84+
check_result_array(res, len(self.dummy_arr))
8385

8486
return res, initialized
8587

@@ -278,9 +280,14 @@ cdef class SeriesGrouper(_BaseGrouper):
278280
return result, counts
279281

280282

281-
cdef inline _extract_result(object res, bint squeeze=True):
283+
cpdef inline extract_result(object res, bint squeeze=True):
282284
""" extract the result object, it might be a 0-dim ndarray
283285
or a len-1 0-dim, or a scalar """
286+
if hasattr(res, "_values"):
287+
# Preserve EA
288+
res = res._values
289+
if squeeze and res.ndim == 1 and len(res) == 1:
290+
res = res[0]
284291
if hasattr(res, 'values') and util.is_array(res.values):
285292
res = res.values
286293
if util.is_array(res):

pandas/_testing.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1960,8 +1960,7 @@ def index_subclass_makers_generator():
19601960
makeCategoricalIndex,
19611961
makeMultiIndex,
19621962
]
1963-
for make_index_func in make_index_funcs:
1964-
yield make_index_func
1963+
yield from make_index_funcs
19651964

19661965

19671966
def all_timeseries_index_generator(k=10):

pandas/_vendored/typing_extensions.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ def __repr__(self):
409409

410410
def __getitem__(self, parameters):
411411
item = typing._type_check(
412-
parameters, "{} accepts only single type".format(self._name)
412+
parameters, f"{self._name} accepts only single type"
413413
)
414414
return _GenericAlias(self, (item,))
415415

@@ -1671,7 +1671,7 @@ def __class_getitem__(cls, params):
16711671
params = (params,)
16721672
if not params and cls is not Tuple:
16731673
raise TypeError(
1674-
"Parameter list to {}[...] cannot be empty".format(cls.__qualname__)
1674+
f"Parameter list to {cls.__qualname__}[...] cannot be empty"
16751675
)
16761676
msg = "Parameters to generic types must be types."
16771677
params = tuple(_type_check(p, msg) for p in params)
@@ -2113,7 +2113,7 @@ def __class_getitem__(cls, params):
21132113
return _AnnotatedAlias(origin, metadata)
21142114

21152115
def __init_subclass__(cls, *args, **kwargs):
2116-
raise TypeError("Cannot subclass {}.Annotated".format(cls.__module__))
2116+
raise TypeError(f"Cannot subclass {cls.__module__}.Annotated")
21172117

21182118
def _strip_annotations(t):
21192119
"""Strips the annotations from a given type.
@@ -2195,7 +2195,7 @@ def _tree_repr(self, tree):
21952195
else:
21962196
tp_repr = origin[0]._tree_repr(origin)
21972197
metadata_reprs = ", ".join(repr(arg) for arg in metadata)
2198-
return "%s[%s, %s]" % (cls, tp_repr, metadata_reprs)
2198+
return f"{cls}[{tp_repr}, {metadata_reprs}]"
21992199

22002200
def _subs_tree(self, tvars=None, args=None): # noqa
22012201
if self is Annotated:
@@ -2382,7 +2382,7 @@ def TypeAlias(self, parameters):
23822382
23832383
It's invalid when used anywhere except as in the example above.
23842384
"""
2385-
raise TypeError("{} is not subscriptable".format(self))
2385+
raise TypeError(f"{self} is not subscriptable")
23862386

23872387

23882388
elif sys.version_info[:2] >= (3, 7):

pandas/_version.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
7474
stderr=(subprocess.PIPE if hide_stderr else None),
7575
)
7676
break
77-
except EnvironmentError:
77+
except OSError:
7878
e = sys.exc_info()[1]
7979
if e.errno == errno.ENOENT:
8080
continue
@@ -121,7 +121,7 @@ def git_get_keywords(versionfile_abs):
121121
# _version.py.
122122
keywords = {}
123123
try:
124-
f = open(versionfile_abs, "r")
124+
f = open(versionfile_abs)
125125
for line in f.readlines():
126126
if line.strip().startswith("git_refnames ="):
127127
mo = re.search(r'=\s*"(.*)"', line)
@@ -132,7 +132,7 @@ def git_get_keywords(versionfile_abs):
132132
if mo:
133133
keywords["full"] = mo.group(1)
134134
f.close()
135-
except EnvironmentError:
135+
except OSError:
136136
pass
137137
return keywords
138138

pandas/core/algorithms.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,12 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
440440
# GH16012
441441
# Ensure np.in1d doesn't get object types or it *may* throw an exception
442442
if len(comps) > 1_000_000 and not is_object_dtype(comps):
443-
f = np.in1d
443+
# If the the values include nan we need to check for nan explicitly
444+
# since np.nan it not equal to np.nan
445+
if np.isnan(values).any():
446+
f = lambda c, v: np.logical_or(np.in1d(c, v), np.isnan(c))
447+
else:
448+
f = np.in1d
444449
elif is_integer_dtype(comps):
445450
try:
446451
values = values.astype("int64", copy=False)

pandas/core/arrays/datetimes.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -570,8 +570,7 @@ def __iter__(self):
570570
converted = ints_to_pydatetime(
571571
data[start_i:end_i], tz=self.tz, freq=self.freq, box="timestamp"
572572
)
573-
for v in converted:
574-
yield v
573+
yield from converted
575574

576575
def astype(self, dtype, copy=True):
577576
# We handle

pandas/core/arrays/interval.py

+36-32
Original file line numberDiff line numberDiff line change
@@ -547,38 +547,7 @@ def __getitem__(self, value):
547547
return self._shallow_copy(left, right)
548548

549549
def __setitem__(self, key, value):
550-
# na value: need special casing to set directly on numpy arrays
551-
needs_float_conversion = False
552-
if is_scalar(value) and isna(value):
553-
if is_integer_dtype(self.dtype.subtype):
554-
# can't set NaN on a numpy integer array
555-
needs_float_conversion = True
556-
elif is_datetime64_any_dtype(self.dtype.subtype):
557-
# need proper NaT to set directly on the numpy array
558-
value = np.datetime64("NaT")
559-
elif is_timedelta64_dtype(self.dtype.subtype):
560-
# need proper NaT to set directly on the numpy array
561-
value = np.timedelta64("NaT")
562-
value_left, value_right = value, value
563-
564-
# scalar interval
565-
elif is_interval_dtype(value) or isinstance(value, Interval):
566-
self._check_closed_matches(value, name="value")
567-
value_left, value_right = value.left, value.right
568-
569-
else:
570-
# list-like of intervals
571-
try:
572-
array = IntervalArray(value)
573-
value_left, value_right = array.left, array.right
574-
except TypeError as err:
575-
# wrong type: not interval or NA
576-
msg = f"'value' should be an interval type, got {type(value)} instead."
577-
raise TypeError(msg) from err
578-
579-
if needs_float_conversion:
580-
raise ValueError("Cannot set float NaN to integer-backed IntervalArray")
581-
550+
value_left, value_right = self._validate_setitem_value(value)
582551
key = check_array_indexer(self, key)
583552

584553
# Need to ensure that left and right are updated atomically, so we're
@@ -898,6 +867,41 @@ def _validate_insert_value(self, value):
898867
)
899868
return left_insert, right_insert
900869

870+
def _validate_setitem_value(self, value):
871+
needs_float_conversion = False
872+
873+
if is_scalar(value) and isna(value):
874+
# na value: need special casing to set directly on numpy arrays
875+
if is_integer_dtype(self.dtype.subtype):
876+
# can't set NaN on a numpy integer array
877+
needs_float_conversion = True
878+
elif is_datetime64_any_dtype(self.dtype.subtype):
879+
# need proper NaT to set directly on the numpy array
880+
value = np.datetime64("NaT")
881+
elif is_timedelta64_dtype(self.dtype.subtype):
882+
# need proper NaT to set directly on the numpy array
883+
value = np.timedelta64("NaT")
884+
value_left, value_right = value, value
885+
886+
elif is_interval_dtype(value) or isinstance(value, Interval):
887+
# scalar interval
888+
self._check_closed_matches(value, name="value")
889+
value_left, value_right = value.left, value.right
890+
891+
else:
892+
try:
893+
# list-like of intervals
894+
array = IntervalArray(value)
895+
value_left, value_right = array.left, array.right
896+
except TypeError as err:
897+
# wrong type: not interval or NA
898+
msg = f"'value' should be an interval type, got {type(value)} instead."
899+
raise TypeError(msg) from err
900+
901+
if needs_float_conversion:
902+
raise ValueError("Cannot set float NaN to integer-backed IntervalArray")
903+
return value_left, value_right
904+
901905
def value_counts(self, dropna=True):
902906
"""
903907
Returns a Series containing counts of each interval.

pandas/core/arrays/sparse/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1427,7 +1427,7 @@ def sparse_arithmetic_method(self, other):
14271427
# TODO: look into _wrap_result
14281428
if len(self) != len(other):
14291429
raise AssertionError(
1430-
(f"length mismatch: {len(self)} vs. {len(other)}")
1430+
f"length mismatch: {len(self)} vs. {len(other)}"
14311431
)
14321432
if not isinstance(other, SparseArray):
14331433
dtype = getattr(other, "dtype", None)

pandas/core/common.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,7 @@ def flatten(l):
6262
"""
6363
for el in l:
6464
if iterable_not_string(el):
65-
for s in flatten(el):
66-
yield s
65+
yield from flatten(el)
6766
else:
6867
yield el
6968

@@ -434,10 +433,8 @@ def random_state(state=None):
434433
return np.random
435434
else:
436435
raise ValueError(
437-
(
438-
"random_state must be an integer, array-like, a BitGenerator, "
439-
"a numpy RandomState, or None"
440-
)
436+
"random_state must be an integer, array-like, a BitGenerator, "
437+
"a numpy RandomState, or None"
441438
)
442439

443440

0 commit comments

Comments
 (0)