|
13 | 13 | # limitations under the License.
|
14 | 14 |
|
15 | 15 | import json
|
| 16 | +import math |
16 | 17 |
|
17 | 18 | import numpy as np
|
18 | 19 | import pandas as pd
|
| 20 | +import pyarrow as pa |
19 | 21 | import pytest
|
20 | 22 |
|
21 | 23 | import db_dtypes
|
|
36 | 38 | "null_field": None,
|
37 | 39 | "order": {
|
38 | 40 | "items": ["book", "pen", "computer"],
|
39 |
| - "total": 15.99, |
| 41 | + "total": 15, |
40 | 42 | "address": {"street": "123 Main St", "city": "Anytown"},
|
41 | 43 | },
|
42 | 44 | },
|
@@ -114,3 +116,122 @@ def test_as_numpy_array():
|
114 | 116 | ]
|
115 | 117 | )
|
116 | 118 | pd._testing.assert_equal(result, expected)
|
| 119 | + |
| 120 | + |
| 121 | +def test_json_arrow_array(): |
| 122 | + data = db_dtypes.JSONArray._from_sequence(JSON_DATA.values()) |
| 123 | + assert isinstance(data.__arrow_array__(), pa.ExtensionArray) |
| 124 | + |
| 125 | + |
| 126 | +def test_json_arrow_storage_type(): |
| 127 | + arrow_json_type = db_dtypes.JSONArrowType() |
| 128 | + assert arrow_json_type.extension_name == "dbjson" |
| 129 | + assert pa.types.is_string(arrow_json_type.storage_type) |
| 130 | + |
| 131 | + |
| 132 | +def test_json_arrow_constructors(): |
| 133 | + data = [ |
| 134 | + json.dumps(value, sort_keys=True, separators=(",", ":")) |
| 135 | + for value in JSON_DATA.values() |
| 136 | + ] |
| 137 | + storage_array = pa.array(data, type=pa.string()) |
| 138 | + |
| 139 | + arr_1 = db_dtypes.JSONArrowType().wrap_array(storage_array) |
| 140 | + assert isinstance(arr_1, pa.ExtensionArray) |
| 141 | + |
| 142 | + arr_2 = pa.ExtensionArray.from_storage(db_dtypes.JSONArrowType(), storage_array) |
| 143 | + assert isinstance(arr_2, pa.ExtensionArray) |
| 144 | + |
| 145 | + assert arr_1 == arr_2 |
| 146 | + |
| 147 | + |
| 148 | +def test_json_arrow_to_pandas(): |
| 149 | + data = [ |
| 150 | + json.dumps(value, sort_keys=True, separators=(",", ":")) |
| 151 | + for value in JSON_DATA.values() |
| 152 | + ] |
| 153 | + arr = pa.array(data, type=db_dtypes.JSONArrowType()) |
| 154 | + |
| 155 | + s = arr.to_pandas() |
| 156 | + assert isinstance(s.dtypes, db_dtypes.JSONDtype) |
| 157 | + assert s[0] |
| 158 | + assert s[1] == 100 |
| 159 | + assert math.isclose(s[2], 0.98) |
| 160 | + assert s[3] == "hello world" |
| 161 | + assert math.isclose(s[4][0], 0.1) |
| 162 | + assert math.isclose(s[4][1], 0.2) |
| 163 | + assert s[5] == { |
| 164 | + "null_field": None, |
| 165 | + "order": { |
| 166 | + "items": ["book", "pen", "computer"], |
| 167 | + "total": 15, |
| 168 | + "address": {"street": "123 Main St", "city": "Anytown"}, |
| 169 | + }, |
| 170 | + } |
| 171 | + assert pd.isna(s[6]) |
| 172 | + |
| 173 | + |
| 174 | +def test_json_arrow_to_pylist(): |
| 175 | + data = [ |
| 176 | + json.dumps(value, sort_keys=True, separators=(",", ":")) |
| 177 | + for value in JSON_DATA.values() |
| 178 | + ] |
| 179 | + arr = pa.array(data, type=db_dtypes.JSONArrowType()) |
| 180 | + |
| 181 | + s = arr.to_pylist() |
| 182 | + assert isinstance(s, list) |
| 183 | + assert s[0] |
| 184 | + assert s[1] == 100 |
| 185 | + assert math.isclose(s[2], 0.98) |
| 186 | + assert s[3] == "hello world" |
| 187 | + assert math.isclose(s[4][0], 0.1) |
| 188 | + assert math.isclose(s[4][1], 0.2) |
| 189 | + assert s[5] == { |
| 190 | + "null_field": None, |
| 191 | + "order": { |
| 192 | + "items": ["book", "pen", "computer"], |
| 193 | + "total": 15, |
| 194 | + "address": {"street": "123 Main St", "city": "Anytown"}, |
| 195 | + }, |
| 196 | + } |
| 197 | + assert s[6] is None |
| 198 | + |
| 199 | + |
| 200 | +def test_json_arrow_record_batch(): |
| 201 | + data = [ |
| 202 | + json.dumps(value, sort_keys=True, separators=(",", ":")) |
| 203 | + for value in JSON_DATA.values() |
| 204 | + ] |
| 205 | + arr = pa.array(data, type=db_dtypes.JSONArrowType()) |
| 206 | + batch = pa.RecordBatch.from_arrays([arr], ["json_col"]) |
| 207 | + sink = pa.BufferOutputStream() |
| 208 | + |
| 209 | + with pa.RecordBatchStreamWriter(sink, batch.schema) as writer: |
| 210 | + writer.write_batch(batch) |
| 211 | + |
| 212 | + buf = sink.getvalue() |
| 213 | + |
| 214 | + with pa.ipc.open_stream(buf) as reader: |
| 215 | + result = reader.read_all() |
| 216 | + |
| 217 | + json_col = result.column("json_col") |
| 218 | + assert isinstance(json_col.type, db_dtypes.JSONArrowType) |
| 219 | + |
| 220 | + s = json_col.to_pylist() |
| 221 | + |
| 222 | + assert isinstance(s, list) |
| 223 | + assert s[0] |
| 224 | + assert s[1] == 100 |
| 225 | + assert math.isclose(s[2], 0.98) |
| 226 | + assert s[3] == "hello world" |
| 227 | + assert math.isclose(s[4][0], 0.1) |
| 228 | + assert math.isclose(s[4][1], 0.2) |
| 229 | + assert s[5] == { |
| 230 | + "null_field": None, |
| 231 | + "order": { |
| 232 | + "items": ["book", "pen", "computer"], |
| 233 | + "total": 15, |
| 234 | + "address": {"street": "123 Main St", "city": "Anytown"}, |
| 235 | + }, |
| 236 | + } |
| 237 | + assert s[6] is None |
0 commit comments