From e9219936e74d6f17eff10a8577e1263a52d18bb4 Mon Sep 17 00:00:00 2001
From: Fangchen Li <fangchen.li@outlook.com>
Date: Mon, 15 Jun 2020 16:02:04 -0500
Subject: [PATCH 1/8] BUG: reading line-format JSON from file url #27135

---
 pandas/io/json/_json.py                       |  2 ++
 pandas/tests/io/json/data/line_delimited.json |  4 ++++
 pandas/tests/io/json/test_readlines.py        | 12 ++++++++++++
 3 files changed, 18 insertions(+)
 create mode 100644 pandas/tests/io/json/data/line_delimited.json

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index b973553a767ba..900d6cf8fdc29 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -690,6 +690,8 @@ def _preprocess_data(self, data):
         if hasattr(data, "read") and (not self.chunksize or not self.nrows):
             data = data.read()
         if not hasattr(data, "read") and (self.chunksize or self.nrows):
+            if isinstance(data, bytes):
+                data = data.decode()
             data = StringIO(data)
 
         return data
diff --git a/pandas/tests/io/json/data/line_delimited.json b/pandas/tests/io/json/data/line_delimited.json
new file mode 100644
index 0000000000000..108f8c7c5fba6
--- /dev/null
+++ b/pandas/tests/io/json/data/line_delimited.json
@@ -0,0 +1,4 @@
+
+    {"a": 1, "b": 2}
+    {"a": 3, "b": 4}
+    {"a": 5, "b": 6}
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index 53462eaaada8d..43dab79f0aa3d 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -219,3 +219,15 @@ def test_readjson_nrows_requires_lines():
     msg = "nrows can only be passed if lines=True"
     with pytest.raises(ValueError, match=msg):
         pd.read_json(jsonl, lines=False, nrows=2)
+
+
+def test_readjson_lines_chunks_fileurl(datapath):
+    # GH 27135
+    # Test reading line-format JSON from file url
+    os_path = datapath("io", "json", "data", "line_delimited.json")
+    file_url = "file://localhost" + os_path
+    path_reader = pd.read_json(os_path, lines=True, chunksize=1)
+    df_list = list(path_reader)
+    url_reader = pd.read_json(file_url, lines=True, chunksize=1)
+    for index, chuck in enumerate(url_reader):
+        tm.assert_frame_equal(chuck, df_list[index])

From 08e438346e9c6e2bf7686b950d3141422d360a30 Mon Sep 17 00:00:00 2001
From: Fangchen Li <fangchen.li@outlook.com>
Date: Mon, 15 Jun 2020 17:34:15 -0500
Subject: [PATCH 2/8] BUG: move type check to _get_data_from_filepath #27135

---
 pandas/io/json/_json.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 900d6cf8fdc29..3371473359857 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -1,6 +1,6 @@
 from collections import abc
 import functools
-from io import StringIO
+from io import StringIO, BytesIO
 from itertools import islice
 import os
 from typing import Any, Callable, Optional, Type
@@ -690,8 +690,6 @@ def _preprocess_data(self, data):
         if hasattr(data, "read") and (not self.chunksize or not self.nrows):
             data = data.read()
         if not hasattr(data, "read") and (self.chunksize or self.nrows):
-            if isinstance(data, bytes):
-                data = data.decode()
             data = StringIO(data)
 
         return data
@@ -726,6 +724,9 @@ def _get_data_from_filepath(self, filepath_or_buffer):
             self.should_close = True
             self.open_stream = data
 
+        if isinstance(data, BytesIO):
+            data = data.getvalue().decode()
+
         return data
 
     def _combine_lines(self, lines) -> str:

From 93a46db3e435366fdce5c486d0e5f7048f09fc86 Mon Sep 17 00:00:00 2001
From: Fangchen Li <fangchen.li@outlook.com>
Date: Mon, 15 Jun 2020 19:34:32 -0500
Subject: [PATCH 3/8] BUG: sort import and file url for windows #27135

---
 pandas/io/json/_json.py                | 2 +-
 pandas/tests/io/json/test_readlines.py | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 3371473359857..ff37c36962aec 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -1,6 +1,6 @@
 from collections import abc
 import functools
-from io import StringIO, BytesIO
+from io import BytesIO, StringIO
 from itertools import islice
 import os
 from typing import Any, Callable, Optional, Type
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index 43dab79f0aa3d..913e7c97600a3 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -1,4 +1,6 @@
 from io import StringIO
+from pathlib import PureWindowsPath
+from platform import system
 
 import pytest
 
@@ -226,6 +228,8 @@ def test_readjson_lines_chunks_fileurl(datapath):
     # Test reading line-format JSON from file url
     os_path = datapath("io", "json", "data", "line_delimited.json")
     file_url = "file://localhost" + os_path
+    if system() == "Windows":
+        file_url = PureWindowsPath(file_url)
     path_reader = pd.read_json(os_path, lines=True, chunksize=1)
     df_list = list(path_reader)
     url_reader = pd.read_json(file_url, lines=True, chunksize=1)

From 3b28e5ee578374c9a3a62500c7cd3cd48d60571f Mon Sep 17 00:00:00 2001
From: Fangchen Li <fangchen.li@outlook.com>
Date: Tue, 16 Jun 2020 11:51:48 -0500
Subject: [PATCH 4/8] BUG: explicit test case #27135

---
 pandas/tests/io/json/test_readlines.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index 913e7c97600a3..2d58f98c1caaa 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -226,12 +226,15 @@ def test_readjson_nrows_requires_lines():
 def test_readjson_lines_chunks_fileurl(datapath):
     # GH 27135
     # Test reading line-format JSON from file url
+    df_list_expected = [
+        pd.DataFrame([[1, 2]], columns=["a", "b"], index=[0]),
+        pd.DataFrame([[3, 4]], columns=["a", "b"], index=[1]),
+        pd.DataFrame([[5, 6]], columns=["a", "b"], index=[2]),
+    ]
     os_path = datapath("io", "json", "data", "line_delimited.json")
     file_url = "file://localhost" + os_path
     if system() == "Windows":
         file_url = PureWindowsPath(file_url)
-    path_reader = pd.read_json(os_path, lines=True, chunksize=1)
-    df_list = list(path_reader)
     url_reader = pd.read_json(file_url, lines=True, chunksize=1)
     for index, chuck in enumerate(url_reader):
-        tm.assert_frame_equal(chuck, df_list[index])
+        tm.assert_frame_equal(chuck, df_list_expected[index])

From b34bc15f613cf036e52285409eb4dacca78ef9a0 Mon Sep 17 00:00:00 2001
From: Fangchen Li <fangchen.li@outlook.com>
Date: Tue, 16 Jun 2020 11:53:08 -0500
Subject: [PATCH 5/8] BUG: update json file #27135

---
 pandas/tests/io/json/data/line_delimited.json | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/io/json/data/line_delimited.json b/pandas/tests/io/json/data/line_delimited.json
index 108f8c7c5fba6..be84245329583 100644
--- a/pandas/tests/io/json/data/line_delimited.json
+++ b/pandas/tests/io/json/data/line_delimited.json
@@ -1,4 +1,3 @@
-
     {"a": 1, "b": 2}
     {"a": 3, "b": 4}
     {"a": 5, "b": 6}

From d7b06d5c1f1a81f5204ea3f2ff2a7f34cea472ab Mon Sep 17 00:00:00 2001
From: Fangchen Li <fangchen.li@outlook.com>
Date: Tue, 16 Jun 2020 14:30:03 -0500
Subject: [PATCH 6/8] BUG: construct path #27135

---
 pandas/tests/io/json/test_readlines.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index 2d58f98c1caaa..b475fa2c514ff 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -1,6 +1,5 @@
 from io import StringIO
-from pathlib import PureWindowsPath
-from platform import system
+from pathlib import Path
 
 import pytest
 
@@ -232,9 +231,7 @@ def test_readjson_lines_chunks_fileurl(datapath):
         pd.DataFrame([[5, 6]], columns=["a", "b"], index=[2]),
     ]
     os_path = datapath("io", "json", "data", "line_delimited.json")
-    file_url = "file://localhost" + os_path
-    if system() == "Windows":
-        file_url = PureWindowsPath(file_url)
+    file_url = Path(os_path).as_uri()
     url_reader = pd.read_json(file_url, lines=True, chunksize=1)
     for index, chuck in enumerate(url_reader):
         tm.assert_frame_equal(chuck, df_list_expected[index])

From f58c4267d37d88d05bfbe20b8cb835815d314854 Mon Sep 17 00:00:00 2001
From: Fangchen Li <fangchen.li@outlook.com>
Date: Wed, 17 Jun 2020 16:55:44 -0500
Subject: [PATCH 7/8] add entry to whatsnews doc 1.10 (#27135)

---
 doc/source/whatsnew/v1.1.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 10522ff797c59..27a7ffa2b3b4e 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -988,6 +988,7 @@ I/O
 - Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`)
 - Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`)
 - :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`)
+- :meth:`read_json` now could read line-delimited JSON from file url. (:issue:`27135`)
 
 Plotting
 ^^^^^^^^

From dab1d401a1cd56d46d228c6bc85d909c13fd09d5 Mon Sep 17 00:00:00 2001
From: Fangchen Li <fangchen.li@outlook.com>
Date: Sat, 27 Jun 2020 01:54:00 -0500
Subject: [PATCH 8/8] whatsnew 1.1 (#27135)

---
 doc/source/whatsnew/v1.1.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index c5eb2febe8ae9..70c45acec9f35 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -1040,6 +1040,7 @@ I/O
 - Bug in :meth:`read_excel` for ODS files removes 0.0 values (:issue:`27222`)
 - Bug in :meth:`ujson.encode` was raising an `OverflowError` with numbers larger than sys.maxsize (:issue: `34395`)
 - Bug in :meth:`HDFStore.append_to_multiple` was raising a ``ValueError`` when the min_itemsize parameter is set (:issue:`11238`)
+- :meth:`read_json` now could read line-delimited json file from a file url while `lines` and `chunksize` are set.
 
 Plotting
 ^^^^^^^^