From eb2f904354c5c6837b8cc86e362ea93d8352caed Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 5 Apr 2019 22:37:42 -0700 Subject: [PATCH 1/8] CLN: Misc PY2/3 compat functions --- pandas/compat/__init__.py | 49 +----------------------- pandas/core/computation/pytables.py | 2 +- pandas/core/computation/scope.py | 2 +- pandas/core/generic.py | 4 +- pandas/core/ops.py | 3 +- pandas/io/clipboards.py | 6 +-- pandas/io/common.py | 2 +- pandas/tests/io/formats/test_printing.py | 2 +- pandas/tests/io/json/test_ujson.py | 2 +- pandas/tests/series/test_api.py | 4 +- 10 files changed, 14 insertions(+), 62 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index c37dae4ae3194..9c929321d02e1 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -22,7 +22,6 @@ from distutils.version import LooseVersion import sys import platform -import types import struct import inspect from collections import namedtuple @@ -34,8 +33,6 @@ PY37 = sys.version_info >= (3, 7) PYPY = platform.python_implementation() == 'PyPy' -from pandas.compat.chainmap import DeepChainMap - # list-producing versions of the major Python iterating functions def lrange(*args, **kwargs): @@ -55,15 +52,6 @@ def lfilter(*args, **kwargs): if PY3: - def isidentifier(s): - return s.isidentifier() - - def str_to_bytes(s, encoding=None): - return s.encode(encoding or 'ascii') - - def bytes_to_str(b, encoding=None): - return b.decode(encoding or 'utf-8') - # The signature version below is directly copied from Django, # https://github.com/django/django/pull/4846 def signature(f): @@ -92,17 +80,6 @@ def signature(f): return argspec(args, defaults, varargs, keywords) else: # Python 2 - _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$") - - def isidentifier(s, dotted=False): - return bool(_name_re.match(s)) - - def str_to_bytes(s, encoding='ascii'): - return s - - def bytes_to_str(b, encoding='ascii'): - return b - def signature(f): return inspect.getargspec(f) @@ -126,30 +103,6 @@ def iterkeys(obj, **kw): def itervalues(obj, **kw): return iter(obj.values(**kw)) - -def bind_method(cls, name, func): - """Bind a method to class, python 2 and python 3 compatible. - - Parameters - ---------- - - cls : type - class to receive bound method - name : basestring - name of method on class instance - func : function - function to be bound as method - - - Returns - ------- - None - """ - # only python 2 has bound/unbound method issue - if not PY3: - setattr(cls, name, types.MethodType(func, None, cls)) - else: - setattr(cls, name, func) # ---------------------------------------------------------------------------- # functions largely based / taken from the six module @@ -164,7 +117,7 @@ def to_str(s): Convert bytes and non-string into Python 3 str """ if isinstance(s, bytes): - s = bytes_to_str(s) + s = s.decode('utf-8') elif not isinstance(s, str): s = str(s) return s diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 52e255446bd1f..6b03237d5991e 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -6,7 +6,7 @@ import numpy as np from pandas._libs.tslibs import Timedelta, Timestamp -from pandas.compat import DeepChainMap +from pandas.compat.chainmap import DeepChainMap from pandas.core.dtypes.common import is_list_like diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 62d78b3e207e0..185b5dd36e4fc 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -13,7 +13,7 @@ import numpy as np from pandas._libs.tslibs import Timestamp -from pandas.compat import DeepChainMap +from pandas.compat.chainmap import DeepChainMap from pandas.core.base import StringMixin import pandas.core.computation as compu diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c12e9e7e04af6..6f74d9545f092 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -16,7 +16,7 @@ from pandas._libs import Timestamp, iNaT, properties import pandas.compat as compat -from pandas.compat import isidentifier, lrange, lzip, set_function_name, to_str +from pandas.compat import lrange, lzip, set_function_name, to_str from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import ( @@ -5150,7 +5150,7 @@ def _dir_additions(self): If info_axis is a MultiIndex, it's first level values are used. """ additions = {c for c in self._info_axis.unique(level=0)[:100] - if isinstance(c, str) and isidentifier(c)} + if isinstance(c, str) and c.isidentifier()} return super(NDFrame, self)._dir_additions().union(additions) # ---------------------------------------------------------------------- diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 92465a4acd9d7..78610e9b3ed9c 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -6,6 +6,7 @@ import datetime import operator import textwrap +import types import warnings import numpy as np @@ -1545,7 +1546,7 @@ def add_methods(cls, new_methods): force = not (issubclass(cls, ABCSparseArray) and name.startswith('__i')) if force or name not in cls.__dict__: - bind_method(cls, name, method) + setattr(cls, name, types.MethodType(method, None, cls)) # ---------------------------------------------------------------------- diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index d5c9a1f3ec2cf..33c468a725150 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -38,10 +38,8 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover # Try to decode (if needed, as "text" might already be a string here). try: - text = compat.bytes_to_str( - text, encoding=(kwargs.get('encoding') or - get_option('display.encoding')) - ) + text = text.decode(kwargs.get('encoding') or + get_option('display.encoding')) except AttributeError: pass diff --git a/pandas/io/common.py b/pandas/io/common.py index bcbfa7930311e..2352a3fc3eaf3 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -460,7 +460,7 @@ def __next__(self): # readline returns bytes, not str, but Python's CSV reader # expects str, so convert the output to str before continuing - newline = compat.bytes_to_str(newline) + newline = newline.decode('utf-8') # mmap doesn't raise if reading past the allocated # data but instead returns an empty string, so raise diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 1c4c97e832931..39e9b8cdc8920 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -27,7 +27,7 @@ def test_repr_binary_type(): raw = bytes(letters, encoding=cf.get_option('display.encoding')) except TypeError: raw = bytes(letters) - b = str(compat.bytes_to_str(raw)) + b = str(raw.decode('utf-8')) res = printing.pprint_thing(b, quote_strings=True) assert res == repr(b) res = printing.pprint_thing(b, quote_strings=False) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index e947100ba8605..0e0a0ae6e34da 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -654,7 +654,7 @@ def test_decode_big_escape(self): # Make sure no Exception is raised. for _ in range(10): base = '\u00e5'.encode("utf-8") - quote = compat.str_to_bytes("\"") + quote = "\"".encode('ascii') escape_input = quote + (base * 1024 * 1024 * 2) + quote ujson.decode(escape_input) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index e4f8bf1dee18b..81642f65f05a4 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -8,7 +8,7 @@ import pytest import pandas.compat as compat -from pandas.compat import isidentifier, lzip +from pandas.compat import lzip import pandas as pd from pandas import ( @@ -282,7 +282,7 @@ def test_index_tab_completion(self, index): for i, x in enumerate(s.index.unique(level=0)): if i < 100: assert (not isinstance(x, str) or - not isidentifier(x) or x in dir_s) + not x.isidentifier() or x in dir_s) else: assert x not in dir_s From 5853e4bf60c6c60b38699cadc59fc41b6b76e70c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 5 Apr 2019 22:57:12 -0700 Subject: [PATCH 2/8] Remove import --- pandas/compat/__init__.py | 1 - pandas/core/ops.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 9c929321d02e1..88e088070454b 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -8,7 +8,6 @@ * lists: lrange(), lmap(), lzip(), lfilter() * iterable method compatibility: iteritems, iterkeys, itervalues * Uses the original method if available, otherwise uses items, keys, values. -* bind_method: binds functions to classes * add_metaclass(metaclass) - class decorator that recreates class with with the given metaclass instead (and avoids intermediary class creation) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 78610e9b3ed9c..84134926f1ce8 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -13,7 +13,6 @@ from pandas._libs import algos as libalgos, lib, ops as libops import pandas.compat as compat -from pandas.compat import bind_method from pandas.errors import NullFrequencyError from pandas.util._decorators import Appender From 598f415aa5832eda183bcd72ed911d81fc0abe59 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 6 Apr 2019 10:06:58 -0700 Subject: [PATCH 3/8] Fix bind_method replacement --- pandas/core/ops.py | 3 +-- pandas/io/clipboards.py | 4 ++-- pandas/tests/io/json/test_ujson.py | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 84134926f1ce8..b7e24ccad3736 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -6,7 +6,6 @@ import datetime import operator import textwrap -import types import warnings import numpy as np @@ -1545,7 +1544,7 @@ def add_methods(cls, new_methods): force = not (issubclass(cls, ABCSparseArray) and name.startswith('__i')) if force or name not in cls.__dict__: - setattr(cls, name, types.MethodType(method, None, cls)) + setattr(cls, name, method) # ---------------------------------------------------------------------- diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 33c468a725150..76168315062c7 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -37,9 +37,9 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover text = clipboard_get() # Try to decode (if needed, as "text" might already be a string here). + encoding = kwargs.get('encoding') or get_option('display.encoding') try: - text = text.decode(kwargs.get('encoding') or - get_option('display.encoding')) + text = text.decode(encoding) except AttributeError: pass diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 0e0a0ae6e34da..3f894dfdef127 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -654,7 +654,7 @@ def test_decode_big_escape(self): # Make sure no Exception is raised. for _ in range(10): base = '\u00e5'.encode("utf-8") - quote = "\"".encode('ascii') + quote = b"\"" escape_input = quote + (base * 1024 * 1024 * 2) + quote ujson.decode(escape_input) From 8b03b78fb9cf45fd6ae3b197973f8e3e50afcfe4 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 6 Apr 2019 13:46:48 -0700 Subject: [PATCH 4/8] Remove unused imports --- pandas/io/clipboards.py | 2 -- pandas/io/common.py | 1 - pandas/tests/io/formats/test_printing.py | 1 - 3 files changed, 4 deletions(-) diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 76168315062c7..cc56868a576d6 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -2,8 +2,6 @@ from io import StringIO import warnings -import pandas.compat as compat - from pandas.core.dtypes.generic import ABCDataFrame from pandas import get_option, option_context diff --git a/pandas/io/common.py b/pandas/io/common.py index 2352a3fc3eaf3..3bdfe52a27e7e 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -16,7 +16,6 @@ from urllib.request import pathname2url, urlopen import zipfile -import pandas.compat as compat from pandas.errors import ( # noqa AbstractMethodError, DtypeWarning, EmptyDataError, ParserError, ParserWarning) diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 39e9b8cdc8920..5ec9114675fdb 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -5,7 +5,6 @@ import pandas._config.config as cf import pandas as pd -from pandas import compat import pandas.io.formats.format as fmt import pandas.io.formats.printing as printing From 89c1901f3d3771db26037d6d4dfad5e4802453a7 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 7 Apr 2019 19:17:19 -0700 Subject: [PATCH 5/8] Retrigger CI --- pandas/compat/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 047874f5c85a7..8e34d826a3fff 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -156,6 +156,7 @@ def raise_with_traceback(exc, traceback=Ellipsis): else: re_type = type(re.compile('')) + # https://github.com/pandas-dev/pandas/pull/9123 def is_platform_little_endian(): """ am I little endian """ From 969a4e78f605c96878f6c2c2cc80d401ed4c476b Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 8 Apr 2019 10:10:12 -0700 Subject: [PATCH 6/8] Put encoding sourcing in try except --- pandas/io/clipboards.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index cc56868a576d6..be1256edf7afe 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -35,9 +35,9 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover text = clipboard_get() # Try to decode (if needed, as "text" might already be a string here). - encoding = kwargs.get('encoding') or get_option('display.encoding') try: - text = text.decode(encoding) + text = text.decode(kwargs.get('encoding') + or get_option('display.encoding')) except AttributeError: pass From a45354a99d6d77deaa478989b67d8490871a361b Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 8 Apr 2019 11:55:04 -0700 Subject: [PATCH 7/8] Clarify quote argument --- pandas/tests/io/json/test_ujson.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 3f894dfdef127..8bf315b73366e 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -654,7 +654,7 @@ def test_decode_big_escape(self): # Make sure no Exception is raised. for _ in range(10): base = '\u00e5'.encode("utf-8") - quote = b"\"" + quote = b'"' escape_input = quote + (base * 1024 * 1024 * 2) + quote ujson.decode(escape_input) From 56abc6461b337ccee236bc1bfbbd2a26b6b94a8f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 8 Apr 2019 13:18:08 -0700 Subject: [PATCH 8/8] Retrigger CI --- pandas/compat/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 8e34d826a3fff..5a52479a4bfcb 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -121,6 +121,7 @@ def wrapper(cls): return metaclass(cls.__name__, cls.__bases__, orig_vars) return wrapper + if PY3: def raise_with_traceback(exc, traceback=Ellipsis): if traceback == Ellipsis: