Skip to content

Commit e50469d

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into Replace_wildcardimports_in_toplevelinit_as_precursor_for_reshape,stata,io_PRs_#25936_#25940_#25939
2 parents 7aa943b + 2f6b90a commit e50469d

23 files changed

+59
-140
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,7 @@ I/O
361361
- Bug in ``read_csv`` which would not raise ``ValueError`` if a column index in ``usecols`` was out of bounds (:issue:`25623`)
362362
- Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested work-arounds (:issue:`25772`)
363363
- Improved :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` to read incorrectly formatted 118 format files saved by Stata (:issue:`25960`)
364+
- Fixed bug in loading objects from S3 that contain ``#`` characters in the URL (:issue:`25945`)
364365

365366
Plotting
366367
^^^^^^^^

mypy.ini

-12
Original file line numberDiff line numberDiff line change
@@ -167,18 +167,6 @@ ignore_errors=True
167167
[mypy-pandas.io.stata]
168168
ignore_errors=True
169169

170-
[mypy-pandas.plotting._core]
171-
ignore_errors=True
172-
173-
[mypy-pandas.tseries.frequencies]
174-
ignore_errors=True
175-
176-
[mypy-pandas.tseries.holiday]
177-
ignore_errors=True
178-
179-
[mypy-pandas.tseries.offsets]
180-
ignore_errors=True
181-
182170
[mypy-pandas.util._doctools]
183171
ignore_errors=True
184172

pandas/compat/__init__.py

+3-83
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
* lists: lrange(), lmap(), lzip(), lfilter()
99
* iterable method compatibility: iteritems, iterkeys, itervalues
1010
* Uses the original method if available, otherwise uses items, keys, values.
11-
* bind_method: binds functions to classes
1211
* add_metaclass(metaclass) - class decorator that recreates class with with the
1312
given metaclass instead (and avoids intermediary class creation)
1413
@@ -22,10 +21,7 @@
2221
from distutils.version import LooseVersion
2322
import sys
2423
import platform
25-
import types
2624
import struct
27-
import inspect
28-
from collections import namedtuple
2925

3026
PY2 = sys.version_info[0] == 2
3127
PY3 = sys.version_info[0] >= 3
@@ -34,8 +30,6 @@
3430
PY37 = sys.version_info >= (3, 7)
3531
PYPY = platform.python_implementation() == 'PyPy'
3632

37-
from pandas.compat.chainmap import DeepChainMap
38-
3933

4034
# list-producing versions of the major Python iterating functions
4135
def lrange(*args, **kwargs):
@@ -54,58 +48,6 @@ def lfilter(*args, **kwargs):
5448
return list(filter(*args, **kwargs))
5549

5650

57-
if PY3:
58-
def isidentifier(s):
59-
return s.isidentifier()
60-
61-
def str_to_bytes(s, encoding=None):
62-
return s.encode(encoding or 'ascii')
63-
64-
def bytes_to_str(b, encoding=None):
65-
return b.decode(encoding or 'utf-8')
66-
67-
# The signature version below is directly copied from Django,
68-
# https://github.com/django/django/pull/4846
69-
def signature(f):
70-
sig = inspect.signature(f)
71-
args = [
72-
p.name for p in sig.parameters.values()
73-
if p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD
74-
]
75-
varargs = [
76-
p.name for p in sig.parameters.values()
77-
if p.kind == inspect.Parameter.VAR_POSITIONAL
78-
]
79-
varargs = varargs[0] if varargs else None
80-
keywords = [
81-
p.name for p in sig.parameters.values()
82-
if p.kind == inspect.Parameter.VAR_KEYWORD
83-
]
84-
keywords = keywords[0] if keywords else None
85-
defaults = [
86-
p.default for p in sig.parameters.values()
87-
if p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD
88-
and p.default is not p.empty
89-
] or None
90-
argspec = namedtuple('Signature', ['args', 'defaults',
91-
'varargs', 'keywords'])
92-
return argspec(args, defaults, varargs, keywords)
93-
else:
94-
# Python 2
95-
_name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$")
96-
97-
def isidentifier(s, dotted=False):
98-
return bool(_name_re.match(s))
99-
100-
def str_to_bytes(s, encoding='ascii'):
101-
return s
102-
103-
def bytes_to_str(b, encoding='ascii'):
104-
return b
105-
106-
def signature(f):
107-
return inspect.getargspec(f)
108-
10951
if PY2:
11052
def iteritems(obj, **kw):
11153
return obj.iteritems(**kw)
@@ -126,30 +68,6 @@ def iterkeys(obj, **kw):
12668
def itervalues(obj, **kw):
12769
return iter(obj.values(**kw))
12870

129-
130-
def bind_method(cls, name, func):
131-
"""Bind a method to class, python 2 and python 3 compatible.
132-
133-
Parameters
134-
----------
135-
136-
cls : type
137-
class to receive bound method
138-
name : basestring
139-
name of method on class instance
140-
func : function
141-
function to be bound as method
142-
143-
144-
Returns
145-
-------
146-
None
147-
"""
148-
# only python 2 has bound/unbound method issue
149-
if not PY3:
150-
setattr(cls, name, types.MethodType(func, None, cls))
151-
else:
152-
setattr(cls, name, func)
15371
# ----------------------------------------------------------------------------
15472
# functions largely based / taken from the six module
15573

@@ -164,7 +82,7 @@ def to_str(s):
16482
Convert bytes and non-string into Python 3 str
16583
"""
16684
if isinstance(s, bytes):
167-
s = bytes_to_str(s)
85+
s = s.decode('utf-8')
16886
elif not isinstance(s, str):
16987
s = str(s)
17088
return s
@@ -203,6 +121,7 @@ def wrapper(cls):
203121
return metaclass(cls.__name__, cls.__bases__, orig_vars)
204122
return wrapper
205123

124+
206125
if PY3:
207126
def raise_with_traceback(exc, traceback=Ellipsis):
208127
if traceback == Ellipsis:
@@ -238,6 +157,7 @@ def raise_with_traceback(exc, traceback=Ellipsis):
238157
else:
239158
re_type = type(re.compile(''))
240159

160+
241161
# https://github.com/pandas-dev/pandas/pull/9123
242162
def is_platform_little_endian():
243163
""" am I little endian """

pandas/core/apply.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1+
import inspect
12
import warnings
23

34
import numpy as np
45

56
from pandas._libs import reduction
6-
import pandas.compat as compat
77
from pandas.util._decorators import cache_readonly
88

99
from pandas.core.dtypes.common import (
@@ -123,7 +123,7 @@ def get_result(self):
123123
# Some methods (shift, etc.) require the axis argument, others
124124
# don't, so inspect and insert if necessary.
125125
func = getattr(self.obj, self.f)
126-
sig = compat.signature(func)
126+
sig = inspect.getfullargspec(func)
127127
if 'axis' in sig.args:
128128
self.kwds['axis'] = self.axis
129129
return func(*self.args, **self.kwds)

pandas/core/computation/pytables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import numpy as np
77

88
from pandas._libs.tslibs import Timedelta, Timestamp
9-
from pandas.compat import DeepChainMap
9+
from pandas.compat.chainmap import DeepChainMap
1010

1111
from pandas.core.dtypes.common import is_list_like
1212

pandas/core/computation/scope.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import numpy as np
1414

1515
from pandas._libs.tslibs import Timestamp
16-
from pandas.compat import DeepChainMap
16+
from pandas.compat.chainmap import DeepChainMap
1717

1818
from pandas.core.base import StringMixin
1919
import pandas.core.computation as compu

pandas/core/generic.py

+16-9
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from pandas._libs import Timestamp, iNaT, properties
1818
import pandas.compat as compat
19-
from pandas.compat import isidentifier, lrange, lzip, set_function_name, to_str
19+
from pandas.compat import lrange, lzip, set_function_name, to_str
2020
from pandas.compat.numpy import function as nv
2121
from pandas.errors import AbstractMethodError
2222
from pandas.util._decorators import (
@@ -5150,7 +5150,7 @@ def _dir_additions(self):
51505150
If info_axis is a MultiIndex, it's first level values are used.
51515151
"""
51525152
additions = {c for c in self._info_axis.unique(level=0)[:100]
5153-
if isinstance(c, str) and isidentifier(c)}
5153+
if isinstance(c, str) and c.isidentifier()}
51545154
return super(NDFrame, self)._dir_additions().union(additions)
51555155

51565156
# ----------------------------------------------------------------------
@@ -8774,22 +8774,22 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
87748774
.. versionadded:: 0.18.1
87758775
A callable can be used as other.
87768776
8777-
inplace : boolean, default False
8777+
inplace : bool, default False
87788778
Whether to perform the operation in place on the data.
87798779
axis : int, default None
87808780
Alignment axis if needed.
87818781
level : int, default None
87828782
Alignment level if needed.
8783-
errors : str, {'raise', 'ignore'}, default `raise`
8783+
errors : str, {'raise', 'ignore'}, default 'raise'
87848784
Note that currently this parameter won't affect
87858785
the results and will always coerce to a suitable dtype.
87868786
8787-
- `raise` : allow exceptions to be raised.
8788-
- `ignore` : suppress exceptions. On error return original object.
8787+
- 'raise' : allow exceptions to be raised.
8788+
- 'ignore' : suppress exceptions. On error return original object.
87898789
8790-
try_cast : boolean, default False
8790+
try_cast : bool, default False
87918791
Try to cast the result back to the input type (if possible).
8792-
raise_on_error : boolean, default True
8792+
raise_on_error : bool, default True
87938793
Whether to raise on invalid data types (e.g. trying to where on
87948794
strings).
87958795
@@ -8799,7 +8799,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
87998799
88008800
Returns
88018801
-------
8802-
wh : same type as caller
8802+
Same type as caller
88038803
88048804
See Also
88058805
--------
@@ -8848,6 +8848,13 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
88488848
dtype: int64
88498849
88508850
>>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
8851+
>>> df
8852+
A B
8853+
0 0 1
8854+
1 2 3
8855+
2 4 5
8856+
3 6 7
8857+
4 8 9
88518858
>>> m = df %% 3 == 0
88528859
>>> df.where(m, -df)
88538860
A B

pandas/core/ops.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313
from pandas._libs import algos as libalgos, lib, ops as libops
1414
import pandas.compat as compat
15-
from pandas.compat import bind_method
1615
from pandas.errors import NullFrequencyError
1716
from pandas.util._decorators import Appender
1817

@@ -1545,7 +1544,7 @@ def add_methods(cls, new_methods):
15451544
force = not (issubclass(cls, ABCSparseArray) and
15461545
name.startswith('__i'))
15471546
if force or name not in cls.__dict__:
1548-
bind_method(cls, name, method)
1547+
setattr(cls, name, method)
15491548

15501549

15511550
# ----------------------------------------------------------------------

pandas/io/clipboards.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
from io import StringIO
33
import warnings
44

5-
import pandas.compat as compat
6-
75
from pandas.core.dtypes.generic import ABCDataFrame
86

97
from pandas import get_option, option_context
@@ -38,10 +36,8 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover
3836

3937
# Try to decode (if needed, as "text" might already be a string here).
4038
try:
41-
text = compat.bytes_to_str(
42-
text, encoding=(kwargs.get('encoding') or
43-
get_option('display.encoding'))
44-
)
39+
text = text.decode(kwargs.get('encoding')
40+
or get_option('display.encoding'))
4541
except AttributeError:
4642
pass
4743

pandas/io/common.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from urllib.request import pathname2url, urlopen
1717
import zipfile
1818

19-
import pandas.compat as compat
2019
from pandas.errors import ( # noqa
2120
AbstractMethodError, DtypeWarning, EmptyDataError, ParserError,
2221
ParserWarning)
@@ -460,7 +459,7 @@ def __next__(self):
460459

461460
# readline returns bytes, not str, but Python's CSV reader
462461
# expects str, so convert the output to str before continuing
463-
newline = compat.bytes_to_str(newline)
462+
newline = newline.decode('utf-8')
464463

465464
# mmap doesn't raise if reading past the allocated
466465
# data but instead returns an empty string, so raise

pandas/io/s3.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
def _strip_schema(url):
1212
"""Returns the url without the s3:// part"""
13-
result = parse_url(url)
13+
result = parse_url(url, allow_fragments=False)
1414
return result.netloc + result.path
1515

1616

pandas/plotting/_core.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# pylint: disable=E1101
33
from collections import namedtuple
44
import re
5+
from typing import List, Optional, Type
56
import warnings
67

78
import numpy as np
@@ -78,7 +79,7 @@ def _kind(self):
7879

7980
_layout_type = 'vertical'
8081
_default_rot = 0
81-
orientation = None
82+
orientation = None # type: Optional[str]
8283
_pop_attributes = ['label', 'style', 'logy', 'logx', 'loglog',
8384
'mark_right', 'stacked']
8485
_attr_defaults = {'logy': False, 'logx': False, 'loglog': False,
@@ -1723,7 +1724,8 @@ def result(self):
17231724
_all_kinds = _common_kinds + _dataframe_kinds + _series_kinds
17241725

17251726
_klasses = [LinePlot, BarPlot, BarhPlot, KdePlot, HistPlot, BoxPlot,
1726-
ScatterPlot, HexBinPlot, AreaPlot, PiePlot]
1727+
ScatterPlot, HexBinPlot, AreaPlot, PiePlot] \
1728+
# type: List[Type[MPLPlot]]
17271729

17281730
_plot_klass = {klass._kind: klass for klass in _klasses}
17291731

pandas/tests/io/conftest.py

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def s3_resource(tips_file, jsonl_file):
5959
moto = pytest.importorskip('moto')
6060

6161
test_s3_files = [
62+
('tips#1.csv', tips_file),
6263
('tips.csv', tips_file),
6364
('tips.csv.gz', tips_file + '.gz'),
6465
('tips.csv.bz2', tips_file + '.bz2'),

pandas/tests/io/formats/test_printing.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import pandas._config.config as cf
66

77
import pandas as pd
8-
from pandas import compat
98

109
import pandas.io.formats.format as fmt
1110
import pandas.io.formats.printing as printing
@@ -27,7 +26,7 @@ def test_repr_binary_type():
2726
raw = bytes(letters, encoding=cf.get_option('display.encoding'))
2827
except TypeError:
2928
raw = bytes(letters)
30-
b = str(compat.bytes_to_str(raw))
29+
b = str(raw.decode('utf-8'))
3130
res = printing.pprint_thing(b, quote_strings=True)
3231
assert res == repr(b)
3332
res = printing.pprint_thing(b, quote_strings=False)

0 commit comments

Comments
 (0)