From 0d3dcbbbde48f794b5536dda7381495f18a78bb4 Mon Sep 17 00:00:00 2001 From: Philipp A Date: Thu, 10 Sep 2015 14:57:50 +0200 Subject: [PATCH] added capability to handle Path/LocalPath objects --- ci/requirements-2.7.pip | 2 ++ doc/source/conf.py | 5 +++-- doc/source/io.rst | 5 +++-- doc/source/whatsnew/v0.17.1.txt | 4 ++++ pandas/io/common.py | 40 +++++++++++++++++++++++++++++++-- pandas/io/tests/test_common.py | 26 +++++++++++++++++++++ pandas/util/testing.py | 17 ++++++++++++++ 7 files changed, 93 insertions(+), 6 deletions(-) diff --git a/ci/requirements-2.7.pip b/ci/requirements-2.7.pip index 644457d69b37f..9bc533110cea3 100644 --- a/ci/requirements-2.7.pip +++ b/ci/requirements-2.7.pip @@ -2,3 +2,5 @@ blosc httplib2 google-api-python-client == 1.2 python-gflags == 2.0 +pathlib +py diff --git a/doc/source/conf.py b/doc/source/conf.py index f2a033eb82d9c..23095b7f4d24b 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -299,8 +299,9 @@ intersphinx_mapping = { 'statsmodels': ('http://statsmodels.sourceforge.net/devel/', None), 'matplotlib': ('http://matplotlib.org/', None), - 'python': ('http://docs.python.org/', None), - 'numpy': ('http://docs.scipy.org/doc/numpy', None) + 'python': ('http://docs.python.org/3', None), + 'numpy': ('http://docs.scipy.org/doc/numpy', None), + 'py': ('http://pylib.readthedocs.org/en/latest/', None) } import glob autosummary_generate = glob.glob("*.rst") diff --git a/doc/source/io.rst b/doc/source/io.rst index 9def8be621aed..44eb8e33e5ddf 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -79,9 +79,10 @@ for some advanced strategies They can take a number of arguments: - - ``filepath_or_buffer``: Either a string path to a file, URL + - ``filepath_or_buffer``: Either a path to a file (a :class:`python:str`, + :class:`python:pathlib.Path`, or :class:`py:py._path.local.LocalPath`), URL (including http, ftp, and S3 locations), or any object with a ``read`` - method (such as an open file or ``StringIO``). + method (such as an open file or :class:`~python:io.StringIO`). - ``sep`` or ``delimiter``: A delimiter / separator to split fields on. With ``sep=None``, ``read_csv`` will try to infer the delimiter automatically in some cases by "sniffing". diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index 736554672a089..ae0004d9eba08 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -23,6 +23,10 @@ Enhancements Other Enhancements ^^^^^^^^^^^^^^^^^^ +- :func:`~pandas.io.parsers.read_csv` and :func:`~pandas.io.parsers.read_table` now + also accept :class:`python:pathlib.Path`, or :class:`py:py._path.local.LocalPath` + for the ``filepath_or_buffer`` argument. (:issue:`11051`) + .. _whatsnew_0171.api: API changes diff --git a/pandas/io/common.py b/pandas/io/common.py index b9cdd44e52555..98c93ded5d3e8 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -5,10 +5,24 @@ import zipfile from contextlib import contextmanager, closing -from pandas.compat import StringIO, string_types, BytesIO +from pandas.compat import StringIO, BytesIO, string_types, text_type from pandas import compat +try: + import pathlib + _PATHLIB_INSTALLED = True +except ImportError: + _PATHLIB_INSTALLED = False + + +try: + from py.path import local as LocalPath + _PY_PATH_INSTALLED = True +except: + _PY_PATH_INSTALLED = False + + if compat.PY3: from urllib.request import urlopen, pathname2url _urlopen = urlopen @@ -201,6 +215,25 @@ def _validate_header_arg(header): "header=int or list-like of ints to specify " "the row(s) making up the column names") +def _stringify_path(filepath_or_buffer): + """Return the argument coerced to a string if it was a pathlib.Path + or a py.path.local + + Parameters + ---------- + filepath_or_buffer : object to be converted + + Returns + ------- + str_filepath_or_buffer : a the string version of the input path + """ + if _PATHLIB_INSTALLED and isinstance(filepath_or_buffer, pathlib.Path): + return text_type(filepath_or_buffer) + if _PY_PATH_INSTALLED and isinstance(filepath_or_buffer, LocalPath): + return filepath_or_buffer.strpath + return filepath_or_buffer + + def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None): """ @@ -209,7 +242,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, Parameters ---------- - filepath_or_buffer : a url, filepath, or buffer + filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), + or buffer encoding : the encoding to use to decode py3 bytes, default is 'utf-8' Returns @@ -257,6 +291,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, filepath_or_buffer = k return filepath_or_buffer, None, compression + # It is a pathlib.Path/py.path.local or string + filepath_or_buffer = _stringify_path(filepath_or_buffer) return _expand_user(filepath_or_buffer), None, compression diff --git a/pandas/io/tests/test_common.py b/pandas/io/tests/test_common.py index 03d1e4fb1f365..003068a702246 100644 --- a/pandas/io/tests/test_common.py +++ b/pandas/io/tests/test_common.py @@ -5,10 +5,20 @@ import os from os.path import isabs +import nose import pandas.util.testing as tm from pandas.io import common +try: + from pathlib import Path +except ImportError: + pass + +try: + from py.path import local as LocalPath +except ImportError: + pass class TestCommonIOCapabilities(tm.TestCase): @@ -27,6 +37,22 @@ def test_expand_user_normal_path(self): self.assertEqual(expanded_name, filename) self.assertEqual(os.path.expanduser(filename), expanded_name) + def test_stringify_path_pathlib(self): + tm._skip_if_no_pathlib() + + rel_path = common._stringify_path(Path('.')) + self.assertEqual(rel_path, '.') + redundant_path = common._stringify_path(Path('foo//bar')) + self.assertEqual(redundant_path, 'foo/bar') + + def test_stringify_path_localpath(self): + tm._skip_if_no_localpath() + + path = 'foo/bar' + abs_path = os.path.abspath(path) + lpath = LocalPath(path) + self.assertEqual(common._stringify_path(lpath), abs_path) + def test_get_filepath_or_buffer_with_path(self): filename = '~/sometest' filepath_or_buffer, _, _ = common.get_filepath_or_buffer(filename) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 362351c7c31c2..df3f1aaa815fa 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -255,6 +255,23 @@ def _skip_if_python26(): import nose raise nose.SkipTest("skipping on python2.6") + +def _skip_if_no_pathlib(): + try: + from pathlib import Path + except ImportError: + import nose + raise nose.SkipTest("pathlib not available") + + +def _skip_if_no_localpath(): + try: + from py.path import local as LocalPath + except ImportError: + import nose + raise nose.SkipTest("py.path not installed") + + def _incompat_bottleneck_version(method): """ skip if we have bottleneck installed and its >= 1.0