|
13 | 13 | # limitations under the License.
|
14 | 14 |
|
15 | 15 | import json
|
| 16 | +import math |
16 | 17 |
|
17 | 18 | import numpy as np
|
18 | 19 | import pandas as pd
|
|
37 | 38 | "null_field": None,
|
38 | 39 | "order": {
|
39 | 40 | "items": ["book", "pen", "computer"],
|
40 |
| - "total": 15.99, |
| 41 | + "total": 15, |
41 | 42 | "address": {"street": "123 Main St", "city": "Anytown"},
|
42 | 43 | },
|
43 | 44 | },
|
@@ -117,35 +118,115 @@ def test_as_numpy_array():
|
117 | 118 | pd._testing.assert_equal(result, expected)
|
118 | 119 |
|
119 | 120 |
|
120 |
| -def test_arrow_json_storage_type(): |
121 |
| - arrow_json_type = db_dtypes.ArrowJSONType() |
| 121 | +def test_json_arrow_storage_type(): |
| 122 | + arrow_json_type = db_dtypes.JSONArrowType() |
122 | 123 | assert arrow_json_type.extension_name == "dbjson"
|
123 | 124 | assert pa.types.is_string(arrow_json_type.storage_type)
|
124 | 125 |
|
125 | 126 |
|
126 |
| -def test_arrow_json_constructors(): |
127 |
| - storage_array = pa.array( |
128 |
| - ["0", "str", '{"b": 2}', '{"a": [1, 2, 3]}'], type=pa.string() |
129 |
| - ) |
130 |
| - arr_1 = db_dtypes.ArrowJSONType().wrap_array(storage_array) |
| 127 | +def test_json_arrow_constructors(): |
| 128 | + data = [ |
| 129 | + json.dumps(value, sort_keys=True, separators=(",", ":")) |
| 130 | + for value in JSON_DATA.values() |
| 131 | + ] |
| 132 | + storage_array = pa.array(data, type=pa.string()) |
| 133 | + |
| 134 | + arr_1 = db_dtypes.JSONArrowType().wrap_array(storage_array) |
131 | 135 | assert isinstance(arr_1, pa.ExtensionArray)
|
132 | 136 |
|
133 |
| - arr_2 = pa.ExtensionArray.from_storage(db_dtypes.ArrowJSONType(), storage_array) |
| 137 | + arr_2 = pa.ExtensionArray.from_storage(db_dtypes.JSONArrowType(), storage_array) |
134 | 138 | assert isinstance(arr_2, pa.ExtensionArray)
|
135 | 139 |
|
136 | 140 | assert arr_1 == arr_2
|
137 | 141 |
|
138 | 142 |
|
139 |
| -def test_arrow_json_to_pandas(): |
140 |
| - storage_array = pa.array( |
141 |
| - [None, "0", "str", '{"b": 2}', '{"a": [1, 2, 3]}'], type=pa.string() |
142 |
| - ) |
143 |
| - arr = db_dtypes.ArrowJSONType().wrap_array(storage_array) |
| 143 | +def test_json_arrow_to_pandas(): |
| 144 | + data = [ |
| 145 | + json.dumps(value, sort_keys=True, separators=(",", ":")) |
| 146 | + for value in JSON_DATA.values() |
| 147 | + ] |
| 148 | + arr = pa.array(data, type=db_dtypes.JSONArrowType()) |
144 | 149 |
|
145 | 150 | s = arr.to_pandas()
|
146 | 151 | assert isinstance(s.dtypes, db_dtypes.JSONDtype)
|
147 |
| - assert pd.isna(s[0]) |
148 |
| - assert s[1] == 0 |
149 |
| - assert s[2] == "str" |
150 |
| - assert s[3]["b"] == 2 |
151 |
| - assert s[4]["a"] == [1, 2, 3] |
| 152 | + assert s[0] |
| 153 | + assert s[1] == 100 |
| 154 | + assert math.isclose(s[2], 0.98) |
| 155 | + assert s[3] == "hello world" |
| 156 | + assert math.isclose(s[4][0], 0.1) |
| 157 | + assert math.isclose(s[4][1], 0.2) |
| 158 | + assert s[5] == { |
| 159 | + "null_field": None, |
| 160 | + "order": { |
| 161 | + "items": ["book", "pen", "computer"], |
| 162 | + "total": 15, |
| 163 | + "address": {"street": "123 Main St", "city": "Anytown"}, |
| 164 | + }, |
| 165 | + } |
| 166 | + assert pd.isna(s[6]) |
| 167 | + |
| 168 | + |
| 169 | +def test_json_arrow_to_pylist(): |
| 170 | + data = [ |
| 171 | + json.dumps(value, sort_keys=True, separators=(",", ":")) |
| 172 | + for value in JSON_DATA.values() |
| 173 | + ] |
| 174 | + arr = pa.array(data, type=db_dtypes.JSONArrowType()) |
| 175 | + |
| 176 | + s = arr.to_pylist() |
| 177 | + assert isinstance(s, list) |
| 178 | + assert s[0] |
| 179 | + assert s[1] == 100 |
| 180 | + assert math.isclose(s[2], 0.98) |
| 181 | + assert s[3] == "hello world" |
| 182 | + assert math.isclose(s[4][0], 0.1) |
| 183 | + assert math.isclose(s[4][1], 0.2) |
| 184 | + assert s[5] == { |
| 185 | + "null_field": None, |
| 186 | + "order": { |
| 187 | + "items": ["book", "pen", "computer"], |
| 188 | + "total": 15, |
| 189 | + "address": {"street": "123 Main St", "city": "Anytown"}, |
| 190 | + }, |
| 191 | + } |
| 192 | + assert s[6] is None |
| 193 | + |
| 194 | + |
| 195 | +def test_json_arrow_record_batch(): |
| 196 | + data = [ |
| 197 | + json.dumps(value, sort_keys=True, separators=(",", ":")) |
| 198 | + for value in JSON_DATA.values() |
| 199 | + ] |
| 200 | + arr = pa.array(data, type=db_dtypes.JSONArrowType()) |
| 201 | + batch = pa.RecordBatch.from_arrays([arr], ["json_col"]) |
| 202 | + sink = pa.BufferOutputStream() |
| 203 | + |
| 204 | + with pa.RecordBatchStreamWriter(sink, batch.schema) as writer: |
| 205 | + writer.write_batch(batch) |
| 206 | + |
| 207 | + buf = sink.getvalue() |
| 208 | + |
| 209 | + with pa.ipc.open_stream(buf) as reader: |
| 210 | + result = reader.read_all() |
| 211 | + |
| 212 | + json_col = result.column("json_col") |
| 213 | + assert isinstance(json_col.type, db_dtypes.JSONArrowType) |
| 214 | + |
| 215 | + s = json_col.to_pylist() |
| 216 | + |
| 217 | + assert isinstance(s, list) |
| 218 | + assert s[0] |
| 219 | + assert s[1] == 100 |
| 220 | + assert math.isclose(s[2], 0.98) |
| 221 | + assert s[3] == "hello world" |
| 222 | + assert math.isclose(s[4][0], 0.1) |
| 223 | + assert math.isclose(s[4][1], 0.2) |
| 224 | + assert s[5] == { |
| 225 | + "null_field": None, |
| 226 | + "order": { |
| 227 | + "items": ["book", "pen", "computer"], |
| 228 | + "total": 15, |
| 229 | + "address": {"street": "123 Main St", "city": "Anytown"}, |
| 230 | + }, |
| 231 | + } |
| 232 | + assert s[6] is None |
0 commit comments