pandas-dev · jreback · Nov 27, 2017 · Aug 8, 2017 · Aug 8, 2017 · Aug 8, 2017
diff --git a/ci/requirements-3.5.pip b/ci/requirements-3.5.pip
@@ -1,2 +1,3 @@
 xarray==0.9.1
 pandas-gbq
+moto
diff --git a/ci/requirements_all.txt b/ci/requirements_all.txt
@@ -26,3 +26,4 @@ sqlalchemy
 bottleneck
 pymysql
 Jinja2
+s3fs
diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt
@@ -5,4 +5,4 @@ cython
 pytest>=3.1.0
 pytest-cov
 flake8
-moto
+s3fs
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
@@ -341,12 +341,20 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
             json = filepath_or_buffer
     elif hasattr(filepath_or_buffer, 'read'):
         json = filepath_or_buffer.read()
+
     else:
         json = filepath_or_buffer
 
     if lines:
         # If given a json lines file, we break the string into lines, add
         # commas and put it in a json list to make a valid json object.
+
+        """
+            If PY3 and/or isinstance(json, bytes)
+        """
+        if isinstance(json, bytes):
+            json = json.decode('utf-8')
+
         lines = list(StringIO(json.strip()))
         json = '[' + ','.join(lines) + ']'
 

diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
@@ -0,0 +1,74 @@
+import os
+
+import moto
+import pytest
+from pandas.io.parsers import read_table
+
+HERE = os.path.dirname(__file__)
+
+
+@pytest.fixture(scope='module')
+def tips_file():
+    """Path to the tips dataset"""
+    return os.path.join(HERE, 'parser', 'data', 'tips.csv')
+
+
+@pytest.fixture(scope='module')
+def jsonl_file():
+    """Path a JSONL dataset"""
+    return os.path.join(HERE, 'parser', 'data', 'items.jsonl')
+
+
+@pytest.fixture(scope='module')
+def salaries_table():
+    """DataFrame with the salaries dataset"""
+    path = os.path.join(HERE, 'parser', 'data', 'salaries.csv')
+    return read_table(path)
+
+
+@pytest.fixture(scope='module')
+def s3_resource(tips_file, jsonl_file):
+    """Fixture for mocking S3 interaction.
+
+    The primary bucket name is "pandas-test". The following datasets
+    are loaded.
+
+    - tips.csv
+    - tips.csv.gz
+    - tips.csv.bz2
+    - items.jsonl
+
+    A private bucket "cant_get_it" is also created. The boto3 s3 resource
+    is yielded by the fixture.
+    """
+    pytest.importorskip('s3fs')
+    moto.mock_s3().start()
+
+    test_s3_files = [
+        ('tips.csv', tips_file),
+        ('tips.csv.gz', tips_file + '.gz'),
+        ('tips.csv.bz2', tips_file + '.bz2'),
+        ('items.jsonl', jsonl_file),
+    ]
+
+    def add_tips_files(bucket_name):
+        for s3_key, file_name in test_s3_files:
+            with open(file_name, 'rb') as f:
+                conn.Bucket(bucket_name).put_object(
+                    Key=s3_key,
+                    Body=f)
+
+    boto3 = pytest.importorskip('boto3')
+    # see gh-16135
+    bucket = 'pandas-test'
+
+    conn = boto3.resource("s3", region_name="us-east-1")
+    conn.create_bucket(Bucket=bucket)
+    add_tips_files(bucket)
+
+    conn.create_bucket(Bucket='cant_get_it', ACL='private')
+    add_tips_files('cant_get_it')
+
+    yield conn
+
+    moto.mock_s3().stop()
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -4,7 +4,6 @@
 from pandas.compat import (range, lrange, StringIO,
                            OrderedDict, is_platform_32bit)
 import os
-
 import numpy as np
 from pandas import (Series, DataFrame, DatetimeIndex, Timestamp,
                     read_json, compat)
@@ -985,12 +984,29 @@ def test_tz_range_is_utc(self):
         df = DataFrame({'DT': dti})
         assert dumps(df, iso_dates=True) == dfexp
 
-    def test_read_jsonl(self):
+    def test_read_inline_jsonl(self):
         # GH9180
         result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
         expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
         assert_frame_equal(result, expected)
 
+    def test_read_s3_jsonl(self, s3_resource):
+        pytest.importorskip('s3fs')
+        # GH17200
+
+        result = read_json('s3n://pandas-test/items.jsonl', lines=True)
+        expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
+        assert_frame_equal(result, expected)
+
+    def test_read_local_jsonl(self):
+        # GH17200
+        with ensure_clean('tmp_items.json') as path:
+            with open(path, 'w') as infile:
+                infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
+            result = read_json(path, lines=True)
+            expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
+            assert_frame_equal(result, expected)
+
     def test_read_jsonl_unicode_chars(self):
         # GH15132: non-ascii unicode characters
         # \u201d == RIGHT DOUBLE QUOTATION MARK

diff --git a/pandas/tests/io/parser/data/items.jsonl b/pandas/tests/io/parser/data/items.jsonl
@@ -0,0 +1,2 @@
+{"a": 1, "b": 2}
+{"b":2, "a" :1}
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
@@ -4,62 +4,14 @@
 Tests parsers ability to read and parse non-local files
 and hence require a network connection to be read.
 """
-import os
-
 import pytest
-import moto
 
 import pandas.util.testing as tm
 from pandas import DataFrame
 from pandas.io.parsers import read_csv, read_table
 from pandas.compat import BytesIO
 
 
-@pytest.fixture(scope='module')
-def tips_file():
-    return os.path.join(tm.get_data_path(), 'tips.csv')
-
-
-@pytest.fixture(scope='module')
-def salaries_table():
-    path = os.path.join(tm.get_data_path(), 'salaries.csv')
-    return read_table(path)
-
-
-@pytest.fixture(scope='module')
-def s3_resource(tips_file):
-    pytest.importorskip('s3fs')
-    moto.mock_s3().start()
-
-    test_s3_files = [
-        ('tips.csv', tips_file),
-        ('tips.csv.gz', tips_file + '.gz'),
-        ('tips.csv.bz2', tips_file + '.bz2'),
-    ]
-
-    def add_tips_files(bucket_name):
-        for s3_key, file_name in test_s3_files:
-            with open(file_name, 'rb') as f:
-                conn.Bucket(bucket_name).put_object(
-                    Key=s3_key,
-                    Body=f)
-
-    boto3 = pytest.importorskip('boto3')
-    # see gh-16135
-    bucket = 'pandas-test'
-
-    conn = boto3.resource("s3", region_name="us-east-1")
-    conn.create_bucket(Bucket=bucket)
-    add_tips_files(bucket)
-
-    conn.create_bucket(Bucket='cant_get_it', ACL='private')
-    add_tips_files('cant_get_it')
-
-    yield conn
-
-    moto.mock_s3().stop()
-
-
 @pytest.mark.network
 @pytest.mark.parametrize(
     "compression,extension",
-Original file line number
+Diff line change
@@ Expand Up / @@ -26,3 +26,4 @@ sqlalchemy @@
     bottleneck
     pymysql
     Jinja2
+    s3fs
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		{"a": 1, "b": 2}
Copy link Contributor jreback Sep 26, 2017 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. what is the purpose of this file? Copy link Contributor jreback Sep 26, 2017 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. I see, ok you have to have this named `.json` otherwise it won't be picked up by `setup.py` (IOW the install test will fail).
		{"b":2, "a" :1}