From ad97232d55650fff84d16417337664794ab736aa Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 26 Mar 2019 12:54:19 -0700 Subject: [PATCH] CLN: Remove PY2/3 references io directory xref gh-25725 --- pandas/core/arrays/categorical.py | 2 +- pandas/core/series.py | 2 +- pandas/io/clipboard/clipboards.py | 12 +- pandas/io/clipboards.py | 31 ++--- pandas/io/common.py | 175 ++++-------------------- pandas/io/excel/_base.py | 7 +- pandas/io/excel/_util.py | 35 +---- pandas/io/excel/_xlrd.py | 5 +- pandas/io/formats/console.py | 2 +- pandas/io/formats/csvs.py | 4 +- pandas/io/formats/format.py | 4 +- pandas/io/formats/html.py | 2 +- pandas/io/formats/printing.py | 8 +- pandas/io/formats/terminal.py | 132 +----------------- pandas/io/packers.py | 8 +- pandas/io/parsers.py | 23 +--- pandas/io/pickle.py | 6 +- pandas/io/pytables.py | 14 +- pandas/io/s3.py | 7 +- pandas/io/sas/sas_xport.py | 7 +- pandas/io/stata.py | 72 ++++------ pandas/tests/io/formats/test_console.py | 19 --- pandas/tests/io/formats/test_format.py | 7 +- 23 files changed, 111 insertions(+), 473 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 26f42cd13ffe1..1e1120a135a41 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1,5 +1,6 @@ # pylint: disable=E1101,W0232 +from shutil import get_terminal_size import textwrap from warnings import warn @@ -38,7 +39,6 @@ from pandas.core.sorting import nargsort from pandas.io.formats import console -from pandas.io.formats.terminal import get_terminal_size from .base import ExtensionArray, _extension_array_shared_docs diff --git a/pandas/core/series.py b/pandas/core/series.py index 8bcab7129feb2..e62fc0aba0877 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2,6 +2,7 @@ Data structure for 1-dimensional cross-sectional and time series data """ from collections import OrderedDict +from shutil import get_terminal_size from textwrap import dedent import warnings @@ -47,7 +48,6 @@ from pandas.core.tools.datetimes import to_datetime import pandas.io.formats.format as fmt -from pandas.io.formats.terminal import get_terminal_size import pandas.plotting._core as gfx # pylint: disable=E1101,E1103 diff --git a/pandas/io/clipboard/clipboards.py b/pandas/io/clipboard/clipboards.py index d6d0ba0a560bb..b7bed084d72f1 100644 --- a/pandas/io/clipboard/clipboards.py +++ b/pandas/io/clipboard/clipboards.py @@ -1,7 +1,5 @@ import subprocess -from pandas.compat import PY2, text_type - from .exceptions import PyperclipException EXCEPT_MSG = """ @@ -66,7 +64,7 @@ def copy_qt(text): def paste_qt(): cb = app.clipboard() - return text_type(cb.text()) + return str(cb.text()) return copy_qt, paste_qt @@ -135,11 +133,7 @@ class ClipboardUnavailable(object): def __call__(self, *args, **kwargs): raise PyperclipException(EXCEPT_MSG) - if PY2: - def __nonzero__(self): - return False - else: - def __bool__(self): - return False + def __bool__(self): + return False return ClipboardUnavailable(), ClipboardUnavailable() diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 23a2b04214e4e..372d74359fe89 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -2,7 +2,7 @@ import warnings import pandas.compat as compat -from pandas.compat import PY2, PY3, StringIO +from pandas.compat import StringIO from pandas.core.dtypes.generic import ABCDataFrame @@ -36,16 +36,14 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover from pandas.io.parsers import read_csv text = clipboard_get() - # try to decode (if needed on PY3) - # Strange. linux py33 doesn't complain, win py33 does - if PY3: - try: - text = compat.bytes_to_str( - text, encoding=(kwargs.get('encoding') or - get_option('display.encoding')) - ) - except AttributeError: - pass + # Try to decode (if needed, as "text" might already be a string here). + try: + text = compat.bytes_to_str( + text, encoding=(kwargs.get('encoding') or + get_option('display.encoding')) + ) + except AttributeError: + pass # Excel copies into clipboard with \t separation # inspect no more then the 10 first lines, if they @@ -75,13 +73,6 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover warnings.warn('read_clipboard with regex separator does not work' ' properly with c engine') - # In PY2, the c table reader first encodes text with UTF-8 but Python - # table reader uses the format of the passed string. For consistency, - # encode strings for python engine so that output from python and c - # engines produce consistent results - if kwargs.get('engine') == 'python' and PY2: - text = text.encode('utf-8') - return read_csv(StringIO(text), sep=sep, **kwargs) @@ -123,11 +114,11 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover if sep is None: sep = '\t' buf = StringIO() + # clipboard_set (pyperclip) expects unicode obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs) text = buf.getvalue() - if PY2: - text = text.decode('utf-8') + clipboard_set(text) return except TypeError: diff --git a/pandas/io/common.py b/pandas/io/common.py index a042a0ae3d7d7..df148ab0df109 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -2,23 +2,26 @@ import bz2 import codecs -from contextlib import closing, contextmanager import csv import gzip +from http.client import HTTPException # noqa import lzma import mmap import os +from urllib.error import URLError # noqa +from urllib.parse import ( # noqa + urlencode, urljoin, urlparse as parse_url, uses_netloc, uses_params, + uses_relative) +from urllib.request import pathname2url, urlopen import zipfile import pandas.compat as compat -from pandas.compat import BytesIO, StringIO, string_types, text_type +from pandas.compat import BytesIO, string_types, text_type from pandas.errors import ( # noqa AbstractMethodError, DtypeWarning, EmptyDataError, ParserError, ParserWarning) -from pandas.core.dtypes.common import is_file_like, is_number - -from pandas.io.formats.printing import pprint_thing +from pandas.core.dtypes.common import is_file_like # gh-12665: Alias for now and remove later. CParserError = ParserError @@ -31,31 +34,6 @@ '-nan', ''} -if compat.PY3: - from urllib.request import urlopen, pathname2url - _urlopen = urlopen - from urllib.parse import urlparse as parse_url - from urllib.parse import (uses_relative, uses_netloc, uses_params, - urlencode, urljoin) - from urllib.error import URLError - from http.client import HTTPException # noqa -else: - from urllib2 import urlopen as _urlopen - from urllib import urlencode, pathname2url # noqa - from urlparse import urlparse as parse_url - from urlparse import uses_relative, uses_netloc, uses_params, urljoin - from urllib2 import URLError # noqa - from httplib import HTTPException # noqa - from contextlib import contextmanager, closing # noqa - from functools import wraps # noqa - - # @wraps(_urlopen) - @contextmanager - def urlopen(*args, **kwargs): - with closing(_urlopen(*args, **kwargs)) as f: - yield f - - _VALID_URLS = set(uses_relative + uses_netloc + uses_params) _VALID_URLS.discard('') @@ -72,10 +50,6 @@ def __next__(self): raise AbstractMethodError(self) -if not compat.PY3: - BaseIterator.next = lambda self: self.__next__() - - def _is_url(url): """Check to see if a URL has a valid protocol. @@ -189,7 +163,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, ---------- filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), or buffer - encoding : the encoding to use to decode py3 bytes, default is 'utf-8' + compression : {{'gzip', 'bz2', 'zip', 'xz', None}}, optional + encoding : the encoding to use to decode bytes, default is 'utf-8' mode : str, optional Returns @@ -202,7 +177,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, filepath_or_buffer = _stringify_path(filepath_or_buffer) if _is_url(filepath_or_buffer): - req = _urlopen(filepath_or_buffer) + req = urlopen(filepath_or_buffer) content_encoding = req.headers.get('Content-Encoding', None) if content_encoding == 'gzip': # Override compression based on Content-Encoding header @@ -361,10 +336,6 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, if compression: - if compat.PY2 and not is_path and encoding: - msg = 'compression with encoding is not yet supported in Python 2' - raise ValueError(msg) - # GZ Compression if compression == 'gzip': if is_path: @@ -376,11 +347,6 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, elif compression == 'bz2': if is_path: f = bz2.BZ2File(path_or_buf, mode) - elif compat.PY2: - # Python 2's bz2 module can't take file objects, so have to - # run through decompress manually - f = StringIO(bz2.decompress(path_or_buf.read())) - path_or_buf.close() else: f = bz2.BZ2File(path_or_buf) @@ -415,24 +381,19 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, handles.append(f) elif is_path: - if compat.PY2: - # Python 2 - mode = "wb" if mode == "w" else mode - f = open(path_or_buf, mode) - elif encoding: - # Python 3 and encoding + if encoding: + # Encoding f = open(path_or_buf, mode, encoding=encoding, newline="") elif is_text: - # Python 3 and no explicit encoding + # No explicit encoding f = open(path_or_buf, mode, errors='replace', newline="") else: - # Python 3 and binary mode + # Binary mode f = open(path_or_buf, mode) handles.append(f) - # in Python 3, convert BytesIO or fileobjects passed with an encoding - if (compat.PY3 and is_text and - (compression or isinstance(f, need_text_wrapping))): + # Convert BytesIO or file objects passed with an encoding + if is_text and (compression or isinstance(f, need_text_wrapping)): from io import TextIOWrapper f = TextIOWrapper(f, encoding=encoding, newline='') handles.append(f) @@ -499,11 +460,9 @@ def __iter__(self): def __next__(self): newline = self.mmap.readline() - # readline returns bytes, not str, in Python 3, - # but Python's CSV reader expects str, so convert - # the output to str before continuing - if compat.PY3: - newline = compat.bytes_to_str(newline) + # readline returns bytes, not str, but Python's CSV reader + # expects str, so convert the output to str before continuing + newline = compat.bytes_to_str(newline) # mmap doesn't raise if reading past the allocated # data but instead returns an empty string, so raise @@ -513,14 +472,10 @@ def __next__(self): return newline -if not compat.PY3: - MMapWrapper.next = lambda self: self.__next__() - - class UTF8Recoder(BaseIterator): """ - Iterator that reads an encoded stream and reencodes the input to UTF-8 + Iterator that reads an encoded stream and re-encodes the input to UTF-8 """ def __init__(self, f, encoding): @@ -536,82 +491,12 @@ def next(self): return next(self.reader).encode("utf-8") -if compat.PY3: # pragma: no cover - def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds): - # ignore encoding - return csv.reader(f, dialect=dialect, **kwds) - - def UnicodeWriter(f, dialect=csv.excel, encoding="utf-8", **kwds): - return csv.writer(f, dialect=dialect, **kwds) -else: - class UnicodeReader(BaseIterator): - - """ - A CSV reader which will iterate over lines in the CSV file "f", - which is encoded in the given encoding. - - On Python 3, this is replaced (below) by csv.reader, which handles - unicode. - """ - - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): - f = UTF8Recoder(f, encoding) - self.reader = csv.reader(f, dialect=dialect, **kwds) - - def __next__(self): - row = next(self.reader) - return [compat.text_type(s, "utf-8") for s in row] - - class UnicodeWriter(object): - - """ - A CSV writer which will write rows to CSV file "f", - which is encoded in the given encoding. - """ - - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): - # Redirect output to a queue - self.queue = StringIO() - self.writer = csv.writer(self.queue, dialect=dialect, **kwds) - self.stream = f - self.encoder = codecs.getincrementalencoder(encoding)() - self.quoting = kwds.get("quoting", None) - - def writerow(self, row): - def _check_as_is(x): - return (self.quoting == csv.QUOTE_NONNUMERIC and - is_number(x)) or isinstance(x, str) - - row = [x if _check_as_is(x) - else pprint_thing(x).encode("utf-8") for x in row] - - self.writer.writerow([s for s in row]) - # Fetch UTF-8 output from the queue ... - data = self.queue.getvalue() - data = data.decode("utf-8") - # ... and re-encode it into the target encoding - data = self.encoder.encode(data) - # write to the target stream - self.stream.write(data) - # empty queue - self.queue.truncate(0) - - def writerows(self, rows): - def _check_as_is(x): - return (self.quoting == csv.QUOTE_NONNUMERIC and - is_number(x)) or isinstance(x, str) - - for i, row in enumerate(rows): - rows[i] = [x if _check_as_is(x) - else pprint_thing(x).encode("utf-8") for x in row] - - self.writer.writerows([[s for s in row] for row in rows]) - # Fetch UTF-8 output from the queue ... - data = self.queue.getvalue() - data = data.decode("utf-8") - # ... and re-encode it into the target encoding - data = self.encoder.encode(data) - # write to the target stream - self.stream.write(data) - # empty queue - self.queue.truncate(0) +# Keeping these class for now because it provides a necessary convenience +# for "dropping" the "encoding" argument from our I/O arguments when +# creating a Unicode I/O object. +def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds): + return csv.reader(f, dialect=dialect, **kwds) + + +def UnicodeWriter(f, dialect=csv.excel, encoding="utf-8", **kwds): + return csv.writer(f, dialect=dialect, **kwds) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index a7955bdb5240b..5b09ac2a83bd5 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -19,8 +19,8 @@ from pandas.io.common import _NA_VALUES, _stringify_path, _validate_header_arg from pandas.io.excel._util import ( - _fill_mi_header, _get_default_writer, _maybe_convert_to_string, - _maybe_convert_usecols, _pop_header_name, get_writer) + _fill_mi_header, _get_default_writer, _maybe_convert_usecols, + _pop_header_name, get_writer) from pandas.io.formats.printing import pprint_thing from pandas.io.parsers import TextParser @@ -476,9 +476,6 @@ def parse(self, if header_names: output[asheetname].columns = output[ asheetname].columns.set_names(header_names) - elif compat.PY2: - output[asheetname].columns = _maybe_convert_to_string( - output[asheetname].columns) except EmptyDataError: # No Data, return an empty DataFrame diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index ebde1f954e0e8..4487050b23d7d 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -174,39 +174,6 @@ def _trim_excel_header(row): return row -def _maybe_convert_to_string(row): - """ - Convert elements in a row to string from Unicode. - - This is purely a Python 2.x patch and is performed ONLY when all - elements of the row are string-like. - - Parameters - ---------- - row : array-like - The row of data to convert. - - Returns - ------- - converted : array-like - """ - if compat.PY2: - converted = [] - - for i in range(len(row)): - if isinstance(row[i], compat.string_types): - try: - converted.append(str(row[i])) - except UnicodeEncodeError: - break - else: - break - else: - row = converted - - return row - - def _fill_mi_header(row, control_row): """Forward fill blank entries in row but only inside the same parent index. @@ -235,7 +202,7 @@ def _fill_mi_header(row, control_row): control_row[i] = False last = row[i] - return _maybe_convert_to_string(row), control_row + return row, control_row def _pop_header_name(row, index_col): diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 03495fa7faf0c..37617e5770417 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -1,12 +1,13 @@ from datetime import time from distutils.version import LooseVersion from io import UnsupportedOperation +from urllib.request import urlopen import numpy as np import pandas.compat as compat -from pandas.io.common import _is_url, _urlopen, get_filepath_or_buffer +from pandas.io.common import _is_url, get_filepath_or_buffer from pandas.io.excel._base import _BaseExcelReader @@ -35,7 +36,7 @@ def __init__(self, filepath_or_buffer): # If filepath_or_buffer is a url, want to keep the data as bytes so # can't pass to get_filepath_or_buffer() if _is_url(filepath_or_buffer): - filepath_or_buffer = _urlopen(filepath_or_buffer) + filepath_or_buffer = urlopen(filepath_or_buffer) elif not isinstance(filepath_or_buffer, (ExcelFile, xlrd.Book)): filepath_or_buffer, _, _, _ = get_filepath_or_buffer( filepath_or_buffer) diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index c914de387413c..19c822e5dc270 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -2,7 +2,7 @@ Internal module for console introspection """ -from pandas.io.formats.terminal import get_terminal_size +from shutil import get_terminal_size def get_console_size(): diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 30ec801cee578..9f72bf80de744 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -17,8 +17,6 @@ ABCDatetimeIndex, ABCIndexClass, ABCMultiIndex, ABCPeriodIndex) from pandas.core.dtypes.missing import notna -from pandas import compat - from pandas.io.common import ( UnicodeWriter, _get_handle, _infer_compression, get_filepath_or_buffer) @@ -51,7 +49,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', self.index_label = index_label self.mode = mode if encoding is None: - encoding = 'ascii' if compat.PY2 else 'utf-8' + encoding = 'utf-8' self.encoding = encoding self.compression = _infer_compression(self.path_or_buf, compression) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b7694c4cb8592..dbe6b282ce9c0 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -5,6 +5,7 @@ """ from functools import partial +from shutil import get_terminal_size import numpy as np @@ -32,7 +33,6 @@ from pandas.io.common import _expand_user, _stringify_path from pandas.io.formats.printing import adjoin, justify, pprint_thing -from pandas.io.formats.terminal import get_terminal_size # pylint: disable=W0141 @@ -689,7 +689,7 @@ def to_latex(self, column_format=None, longtable=False, encoding=None, multirow=multirow) if encoding is None: - encoding = 'ascii' if compat.PY2 else 'utf-8' + encoding = 'utf-8' if hasattr(self.buf, 'write'): latex_renderer.write_result(self.buf) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 6ba24a95032f8..eba56bd0e4d87 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -145,7 +145,7 @@ def render(self): self._write_table() if self.should_show_dimensions: - by = chr(215) + by = chr(215) # × self.write('

{rows} rows {by} {cols} columns

' .format(rows=len(self.frame), by=by, diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 90013148a9e0f..3d7e18b8fb0e3 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -206,7 +206,7 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): return compat.text_type(result) - if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'): + if hasattr(thing, '__next__'): return compat.text_type(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): @@ -218,11 +218,7 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): quote_strings=quote_strings, max_seq_items=max_seq_items) elif isinstance(thing, compat.string_types) and quote_strings: - if compat.PY3: - fmt = "'{thing}'" - else: - fmt = "u'{thing}'" - result = fmt.format(thing=as_escaped_unicode(thing)) + result = "'{thing}'".format(thing=as_escaped_unicode(thing)) else: result = as_escaped_unicode(thing) diff --git a/pandas/io/formats/terminal.py b/pandas/io/formats/terminal.py index d342fb20a447b..ae9a380272efa 100644 --- a/pandas/io/formats/terminal.py +++ b/pandas/io/formats/terminal.py @@ -1,50 +1,8 @@ """ -get_terminal_size() -- return width and height of terminal as a tuple - -code from: -http://stackoverflow.com/questions/566746/how-to-get-console- window-width-in- -python - -written by -Harco Kuppens (http://stackoverflow.com/users/825214/harco-kuppens) - -It is mentioned in the stackoverflow response that this code works -on linux, os x, windows and cygwin (windows). +Terminal utilities. """ -import os -import shutil -import subprocess - -from pandas.compat import PY3 -__all__ = ['get_terminal_size', 'is_terminal'] - - -def get_terminal_size(): - """ - Detect terminal size and return tuple = (width, height). - - Only to be used when running in a terminal. Note that the IPython notebook, - IPython zmq frontends, or IDLE do not run in a terminal, - """ - import platform - - if PY3: - return shutil.get_terminal_size() - - current_os = platform.system() - tuple_xy = None - if current_os == 'Windows': - tuple_xy = _get_terminal_size_windows() - if tuple_xy is None: - tuple_xy = _get_terminal_size_tput() - # needed for window's python in cygwin's xterm! - if (current_os == 'Linux' or current_os == 'Darwin' or - current_os.startswith('CYGWIN')): - tuple_xy = _get_terminal_size_linux() - if tuple_xy is None: - tuple_xy = (80, 25) # default value - return tuple_xy +__all__ = ['is_terminal'] def is_terminal(): @@ -62,89 +20,3 @@ def is_terminal(): return False else: # IPython in a terminal return True - - -def _get_terminal_size_windows(): - - try: - from ctypes import windll, create_string_buffer - - # stdin handle is -10 - # stdout handle is -11 - # stderr handle is -12 - - h = windll.kernel32.GetStdHandle(-12) - csbi = create_string_buffer(22) - res = windll.kernel32.GetConsoleScreenBufferInfo(h, csbi) - except (AttributeError, ValueError): - return None - if res: - import struct - (bufx, bufy, curx, cury, wattr, left, top, right, bottom, maxx, - maxy) = struct.unpack("hhhhHhhhhhh", csbi.raw) - sizex = right - left + 1 - sizey = bottom - top + 1 - return sizex, sizey - else: - return None - - -def _get_terminal_size_tput(): - # get terminal width - # src: http://stackoverflow.com/questions/263890/how-do-i-find-the-width - # -height-of-a-terminal-window - - try: - proc = subprocess.Popen(["tput", "cols"], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE) - output_cols = proc.communicate(input=None) - proc = subprocess.Popen(["tput", "lines"], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE) - output_rows = proc.communicate(input=None) - except OSError: - return None - - try: - # Some terminals (e.g. spyder) may report a terminal size of '', - # making the `int` fail. - - cols = int(output_cols[0]) - rows = int(output_rows[0]) - return cols, rows - except (ValueError, IndexError): - return None - - -def _get_terminal_size_linux(): - def ioctl_GWINSZ(fd): - try: - import fcntl - import termios - import struct - cr = struct.unpack( - 'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234')) - except (struct.error, IOError): - return None - return cr - cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2) - if not cr: - try: - fd = os.open(os.ctermid(), os.O_RDONLY) - cr = ioctl_GWINSZ(fd) - os.close(fd) - except OSError: - pass - if not cr or cr == (0, 0): - try: - from os import environ as env - cr = (env['LINES'], env['COLUMNS']) - except (ValueError, KeyError): - return None - return int(cr[1]), int(cr[0]) - - -if __name__ == "__main__": - sizex, sizey = get_terminal_size() - print('width = {w} height = {h}'.format(w=sizex, h=sizey)) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 4a71338bfc686..438f76f7333b3 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -207,12 +207,8 @@ def read(fh): # treat as a binary-like fh = None try: - # We can't distinguish between a path and a buffer of bytes in - # Python 2 so instead assume the first byte of a valid path is - # less than 0x80. - if compat.PY3 or ord(path_or_buf[0]) >= 0x80: - fh = compat.BytesIO(path_or_buf) - return read(fh) + fh = compat.BytesIO(path_or_buf) + return read(fh) finally: if fh is not None: fh.close() diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 6fba3981aa83c..767eb6e8f153e 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -17,7 +17,7 @@ import pandas._libs.parsers as parsers from pandas._libs.tslibs import parsing import pandas.compat as compat -from pandas.compat import PY3, StringIO, lrange, lzip, string_types +from pandas.compat import StringIO, lrange, lzip, string_types from pandas.errors import ( AbstractMethodError, EmptyDataError, ParserError, ParserWarning) from pandas.util._decorators import Appender @@ -939,7 +939,7 @@ def _get_options_with_defaults(self, engine): def _check_file_or_buffer(self, f, engine): # see gh-16530 if is_file_like(f): - next_attr = "__next__" if PY3 else "next" + next_attr = "__next__" # The C engine doesn't need the file-like to have the "next" or # "__next__" attribute. However, the Python engine explicitly calls @@ -2224,8 +2224,7 @@ def __init__(self, f, **kwds): self.comment = kwds['comment'] self._comment_lines = [] - mode = 'r' if PY3 else 'rb' - f, handles = _get_handle(f, mode, encoding=self.encoding, + f, handles = _get_handle(f, 'r', encoding=self.encoding, compression=self.compression, memory_map=self.memory_map) self.handles.extend(handles) @@ -2377,12 +2376,10 @@ class MyDialect(csv.Dialect): else: def _read(): line = f.readline() - - if compat.PY2 and self.encoding: - line = line.decode(self.encoding) - pat = re.compile(sep) + yield pat.split(line.strip()) + for line in f: yield pat.split(line.strip()) reader = _read() @@ -3477,14 +3474,8 @@ def _get_col_names(colspec, columns): def _concat_date_cols(date_cols): if len(date_cols) == 1: - if compat.PY3: - return np.array([compat.text_type(x) for x in date_cols[0]], - dtype=object) - else: - return np.array([ - str(x) if not isinstance(x, compat.string_types) else x - for x in date_cols[0] - ], dtype=object) + return np.array([compat.text_type(x) for x in date_cols[0]], + dtype=object) rs = np.array([' '.join(compat.text_type(y) for y in x) for x in zip(*date_cols)], dtype=object) diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index ab4a266853a78..6ea991a72319e 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -3,7 +3,7 @@ from numpy.lib.format import read_array -from pandas.compat import PY3, BytesIO, cPickle as pkl, pickle_compat as pc +from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc from pandas.io.common import _get_handle, _stringify_path @@ -153,9 +153,7 @@ def read_pickle(path, compression='infer'): try: return pc.load(f, encoding=None) except Exception: # noqa: E722 - if PY3: - return pc.load(f, encoding='latin1') - raise + return pc.load(f, encoding='latin1') finally: f.close() for _f in fh: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fadb9a5c6c7cb..cec2381b39617 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -19,7 +19,7 @@ from pandas._libs import lib, writers as libwriters from pandas._libs.tslibs import timezones -from pandas.compat import PY3, lrange, string_types +from pandas.compat import lrange, string_types from pandas.errors import PerformanceWarning from pandas.core.dtypes.common import ( @@ -46,7 +46,6 @@ _version = '0.15.2' # encoding -# PY3 encoding if we don't specify _default_encoding = 'UTF-8' @@ -60,8 +59,8 @@ def _ensure_decoded(s): def _ensure_encoding(encoding): # set the encoding if we need if encoding is None: - if PY3: - encoding = _default_encoding + encoding = _default_encoding + return encoding @@ -4572,16 +4571,13 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None, shape = data.shape data = np.asarray(data.ravel(), dtype=object) - # guard against a None encoding in PY3 (because of a legacy + # guard against a None encoding (because of a legacy # where the passed encoding is actually None) encoding = _ensure_encoding(encoding) if encoding is not None and len(data): itemsize = libwriters.max_len_string_array(ensure_object(data)) - if compat.PY3: - dtype = "U{0}".format(itemsize) - else: - dtype = "S{0}".format(itemsize) + dtype = "U{0}".format(itemsize) if isinstance(data[0], compat.binary_type): data = Series(data).str.decode(encoding, errors=errors).values diff --git a/pandas/io/s3.py b/pandas/io/s3.py index bf86db279df5a..6129f0dabe027 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -1,16 +1,11 @@ """ s3 support for remote file interactivity """ -from pandas import compat - try: import s3fs from botocore.exceptions import NoCredentialsError except ImportError: raise ImportError("The s3fs library is required to handle s3 files") -if compat.PY3: - from urllib.parse import urlparse as parse_url -else: - from urlparse import urlparse as parse_url +from urllib.parse import urlparse as parse_url def _strip_schema(url): diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 3c607d62b4286..58b0fa7501c97 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -449,9 +449,10 @@ def read(self, nrows=None): v[miss] = np.nan elif self.fields[j]['ntype'] == 'char': v = [y.rstrip() for y in vec] - if compat.PY3: - if self._encoding is not None: - v = [y.decode(self._encoding) for y in v] + + if self._encoding is not None: + v = [y.decode(self._encoding) for y in v] + df[x] = v if self._index is None: diff --git a/pandas/io/stata.py b/pandas/io/stata.py index ec768d68a2ffd..c3d4db828a6f4 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -657,10 +657,7 @@ def _encode(self, s): """ Python 3 compatibility shim """ - if compat.PY3: - return s.encode(self._encoding) - else: - return s + return s.encode(self._encoding) def generate_value_label(self, byteorder, encoding): """ @@ -776,8 +773,8 @@ class StataMissingValue(StringMixin): MISSING_VALUES[value] = '.' if i > 0: MISSING_VALUES[value] += chr(96 + i) - int_value = struct.unpack('' or '<' + self.byteorder = struct.unpack( + 'b', self.path_or_buf.read(1))[0] == 0x1 and '>' or '<' self.filetype = struct.unpack('b', self.path_or_buf.read(1))[0] self.path_or_buf.read(1) # unused @@ -1255,7 +1252,7 @@ def _read_old_header(self, first_char): if tp in self.OLD_TYPE_MAPPING: typlist.append(self.OLD_TYPE_MAPPING[tp]) else: - typlist.append(tp - 127) # py2 string, py3 bytes + typlist.append(tp - 127) # bytes try: self.typlist = [self.TYPE_MAP[typ] for typ in typlist] @@ -2009,11 +2006,8 @@ def _write(self, to_write): """ Helper to call encode before writing to file for Python 3 compat. """ - if compat.PY3: - self._file.write(to_write.encode(self._encoding or - self._default_encoding)) - else: - self._file.write(to_write) + self._file.write(to_write.encode(self._encoding or + self._default_encoding)) def _prepare_categoricals(self, data): """Check for categorical columns, retain categorical information for @@ -2417,12 +2411,12 @@ def _write_data(self): def _null_terminate(self, s, as_string=False): null_byte = '\x00' - if compat.PY3 and not as_string: - s += null_byte - return s.encode(self._encoding) - else: - s += null_byte - return s + s += null_byte + + if not as_string: + s = s.encode(self._encoding) + + return s def _dtype_to_stata_type_117(dtype, column, force_strl): @@ -2467,19 +2461,12 @@ def _dtype_to_stata_type_117(dtype, column, force_strl): raise NotImplementedError("Data type %s not supported." % dtype) -def _bytes(s, encoding): - if compat.PY3: - return bytes(s, encoding) - else: - return bytes(s.encode(encoding)) - - def _pad_bytes_new(name, length): """ Takes a bytes instance and pads it with null bytes until it's length chars. """ if isinstance(name, string_types): - name = _bytes(name, 'utf-8') + name = bytes(name, 'utf-8') return name + b'\x00' * (length - len(name)) @@ -2599,12 +2586,7 @@ def _encode(self, s): """ Python 3 compatibility shim """ - if compat.PY3: - return s.encode(self._encoding) - else: - if isinstance(s, text_type): - return s.encode(self._encoding) - return s + return s.encode(self._encoding) def generate_blob(self, gso_table): """ @@ -2636,7 +2618,7 @@ def generate_blob(self, gso_table): # 3 u4 u8 u1 u4 string + null term bio = BytesIO() - gso = _bytes('GSO', 'ascii') + gso = bytes('GSO', 'ascii') gso_type = struct.pack(self._byteorder + 'B', 130) null = struct.pack(self._byteorder + 'B', 0) v_type = self._byteorder + self._gso_v_type @@ -2660,7 +2642,7 @@ def generate_blob(self, gso_table): bio.write(gso_type) # llll - utf8_string = _bytes(strl, 'utf-8') + utf8_string = bytes(strl, 'utf-8') bio.write(struct.pack(len_type, len(utf8_string) + 1)) # xxx...xxx @@ -2766,10 +2748,10 @@ def __init__(self, fname, data, convert_dates=None, write_index=True, @staticmethod def _tag(val, tag): """Surround val with """ - if isinstance(val, str) and compat.PY3: - val = _bytes(val, 'utf-8') - return (_bytes('<' + tag + '>', 'utf-8') + val + - _bytes('', 'utf-8')) + if isinstance(val, str): + val = bytes(val, 'utf-8') + return (bytes('<' + tag + '>', 'utf-8') + val + + bytes('', 'utf-8')) def _update_map(self, tag): """Update map location for tag with file position""" @@ -2778,10 +2760,10 @@ def _update_map(self, tag): def _write_header(self, data_label=None, time_stamp=None): """Write the file header""" byteorder = self._byteorder - self._file.write(_bytes('', 'utf-8')) + self._file.write(bytes('', 'utf-8')) bio = BytesIO() # ds_format - 117 - bio.write(self._tag(_bytes('117', 'utf-8'), 'release')) + bio.write(self._tag(bytes('117', 'utf-8'), 'release')) # byteorder bio.write(self._tag(byteorder == ">" and "MSF" or "LSF", 'byteorder')) # number of vars, 2 bytes @@ -2792,7 +2774,7 @@ def _write_header(self, data_label=None, time_stamp=None): # data label 81 bytes, char, null terminated label = data_label[:80] if data_label is not None else '' label_len = struct.pack(byteorder + "B", len(label)) - label = label_len + _bytes(label, 'utf-8') + label = label_len + bytes(label, 'utf-8') bio.write(self._tag(label, 'label')) # time stamp, 18 bytes, char, null terminated # format dd Mon yyyy hh:mm @@ -2808,7 +2790,7 @@ def _write_header(self, data_label=None, time_stamp=None): month_lookup[time_stamp.month] + time_stamp.strftime(" %Y %H:%M")) # '\x11' added due to inspection of Stata file - ts = b'\x11' + _bytes(ts, 'utf8') + ts = b'\x11' + bytes(ts, 'utf8') bio.write(self._tag(ts, 'timestamp')) bio.seek(0) self._file.write(self._tag(bio.read(), 'header')) @@ -2948,7 +2930,7 @@ def _write_value_labels(self): def _write_file_close_tag(self): self._update_map('stata_data_close') - self._file.write(_bytes('', 'utf-8')) + self._file.write(bytes('', 'utf-8')) self._update_map('end-of-file') def _update_strl_names(self): diff --git a/pandas/tests/io/formats/test_console.py b/pandas/tests/io/formats/test_console.py index a633ae670eed5..450656efa389d 100644 --- a/pandas/tests/io/formats/test_console.py +++ b/pandas/tests/io/formats/test_console.py @@ -1,11 +1,7 @@ -import subprocess # noqa: F401 - import pytest from pandas._config import detect_console_encoding -from pandas.io.formats.terminal import _get_terminal_size_tput - class MockEncoding(object): # TODO(py27): replace with mock """ @@ -76,18 +72,3 @@ def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale): context.setattr('sys.stdout', MockEncoding(std)) context.setattr('sys.getdefaultencoding', lambda: 'sysDefaultEncoding') assert detect_console_encoding() == 'sysDefaultEncoding' - - -@pytest.mark.parametrize("size", ['', ['']]) -def test_terminal_unknown_dimensions(monkeypatch, size, mocker): - - def communicate(*args, **kwargs): - return size - - monkeypatch.setattr('subprocess.Popen', mocker.Mock()) - monkeypatch.setattr('subprocess.Popen.return_value.returncode', None) - monkeypatch.setattr( - 'subprocess.Popen.return_value.communicate', communicate) - result = _get_terminal_size_tput() - - assert result is None diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 1966e7bb8cc8e..002e72c44afdd 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -9,6 +9,7 @@ from operator import methodcaller import os import re +from shutil import get_terminal_size import sys import textwrap import warnings @@ -30,7 +31,6 @@ import pandas.io.formats.format as fmt import pandas.io.formats.printing as printing -from pandas.io.formats.terminal import get_terminal_size use_32bit_repr = is_platform_windows() or is_platform_32bit() @@ -306,8 +306,6 @@ def test_repr_truncates_terminal_size(self, monkeypatch): # see gh-21180 terminal_size = (118, 96) - monkeypatch.setattr('pandas.io.formats.console.get_terminal_size', - lambda: terminal_size) monkeypatch.setattr('pandas.io.formats.format.get_terminal_size', lambda: terminal_size) @@ -336,8 +334,7 @@ def test_repr_truncates_terminal_size_full(self, monkeypatch): # GH 22984 ensure entire window is filled terminal_size = (80, 24) df = pd.DataFrame(np.random.rand(1, 7)) - monkeypatch.setattr('pandas.io.formats.console.get_terminal_size', - lambda: terminal_size) + monkeypatch.setattr('pandas.io.formats.format.get_terminal_size', lambda: terminal_size) assert "..." not in str(df)