Skip to content

Commit df8d226

Browse files
REF: remove Block access in the JSON writing code
1 parent 11545d5 commit df8d226

File tree

6 files changed

+37
-96
lines changed

6 files changed

+37
-96
lines changed

pandas/_libs/src/ujson/python/objToJSON.c

+29-82
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,12 @@ static int is_simple_frame(PyObject *obj) {
294294
if (!mgr) {
295295
return 0;
296296
}
297-
int ret = (get_attr_length(mgr, "blocks") <= 1);
297+
int ret;
298+
if (PyObject_HasAttrString(mgr, "blocks")) {
299+
ret = (get_attr_length(mgr, "blocks") <= 1);
300+
} else {
301+
ret = 0;
302+
}
298303

299304
Py_DECREF(mgr);
300305
return ret;
@@ -656,16 +661,10 @@ void PdBlockPassThru_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
656661
}
657662

658663
void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
659-
PyObject *obj, *blocks, *block, *values, *tmp;
660-
PyArrayObject *locs;
664+
PyObject *obj, *values, *arrays, *array;
661665
PdBlockContext *blkCtxt;
662666
NpyArrContext *npyarr;
663667
Py_ssize_t i;
664-
NpyIter *iter;
665-
NpyIter_IterNextFunc *iternext;
666-
npy_int64 **dataptr;
667-
npy_int64 colIdx;
668-
npy_intp idx;
669668

670669
obj = (PyObject *)_obj;
671670

@@ -708,97 +707,45 @@ void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
708707
return;
709708
}
710709

711-
blocks = get_sub_attr(obj, "_mgr", "blocks");
712-
if (!blocks) {
710+
arrays = get_sub_attr(obj, "_mgr", "column_arrays");
711+
if (!arrays) {
713712
GET_TC(tc)->iterNext = NpyArr_iterNextNone;
714713
return;
715-
} else if (!PyTuple_Check(blocks)) {
716-
PyErr_SetString(PyExc_TypeError, "blocks must be a tuple!");
717-
goto BLKRET;
718714
}
719715

720-
// force transpose so each NpyArrContext strides down its column
721-
GET_TC(tc)->transpose = 1;
722-
723-
for (i = 0; i < PyObject_Length(blocks); i++) {
724-
block = PyTuple_GET_ITEM(blocks, i);
725-
if (!block) {
716+
for (i = 0; i < PyObject_Length(arrays); i++) {
717+
array = PyList_GET_ITEM(arrays, i);
718+
if (!array) {
726719
GET_TC(tc)->iterNext = NpyArr_iterNextNone;
727-
goto BLKRET;
720+
goto ARR_RET;
728721
}
729722

730-
tmp = PyObject_CallMethod(block, "get_block_values_for_json", NULL);
731-
if (!tmp) {
723+
// ensure we have a numpy array (i.e. np.asarray)
724+
values = PyObject_CallMethod(array, "__array__", NULL);
725+
if ((!values) || (!PyArray_CheckExact(values))) {
726+
// Didn't get a numpy array
732727
((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
733728
GET_TC(tc)->iterNext = NpyArr_iterNextNone;
734-
goto BLKRET;
735-
}
736-
737-
values = PyArray_Transpose((PyArrayObject *)tmp, NULL);
738-
Py_DECREF(tmp);
739-
if (!values) {
740-
GET_TC(tc)->iterNext = NpyArr_iterNextNone;
741-
goto BLKRET;
742-
}
743-
744-
locs = (PyArrayObject *)get_sub_attr(block, "mgr_locs", "as_array");
745-
if (!locs) {
746-
Py_DECREF(values);
747-
GET_TC(tc)->iterNext = NpyArr_iterNextNone;
748-
goto BLKRET;
729+
goto ARR_RET;
749730
}
750731

751-
iter = NpyIter_New(locs, NPY_ITER_READONLY, NPY_KEEPORDER,
752-
NPY_NO_CASTING, NULL);
753-
if (!iter) {
754-
Py_DECREF(values);
755-
Py_DECREF(locs);
756-
GET_TC(tc)->iterNext = NpyArr_iterNextNone;
757-
goto BLKRET;
758-
}
759-
iternext = NpyIter_GetIterNext(iter, NULL);
760-
if (!iternext) {
761-
NpyIter_Deallocate(iter);
762-
Py_DECREF(values);
763-
Py_DECREF(locs);
764-
GET_TC(tc)->iterNext = NpyArr_iterNextNone;
765-
goto BLKRET;
766-
}
767-
dataptr = (npy_int64 **)NpyIter_GetDataPtrArray(iter);
768-
do {
769-
colIdx = **dataptr;
770-
idx = NpyIter_GetIterIndex(iter);
732+
GET_TC(tc)->newObj = values;
771733

772-
blkCtxt->cindices[colIdx] = idx;
734+
// init a dedicated context for this column
735+
NpyArr_iterBegin(obj, tc);
736+
npyarr = GET_TC(tc)->npyarr;
773737

774-
// Reference freed in Pdblock_iterend
775-
Py_INCREF(values);
776-
GET_TC(tc)->newObj = values;
777-
778-
// init a dedicated context for this column
779-
NpyArr_iterBegin(obj, tc);
780-
npyarr = GET_TC(tc)->npyarr;
781-
782-
// set the dataptr to our desired column and initialise
783-
if (npyarr != NULL) {
784-
npyarr->dataptr += npyarr->stride * idx;
785-
NpyArr_iterNext(obj, tc);
786-
}
787-
GET_TC(tc)->itemValue = NULL;
788-
((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = NULL;
789-
790-
blkCtxt->npyCtxts[colIdx] = npyarr;
791-
GET_TC(tc)->newObj = NULL;
792-
} while (iternext(iter));
738+
GET_TC(tc)->itemValue = NULL;
739+
((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = NULL;
793740

794-
NpyIter_Deallocate(iter);
795-
Py_DECREF(values);
796-
Py_DECREF(locs);
741+
blkCtxt->npyCtxts[i] = npyarr;
742+
GET_TC(tc)->newObj = NULL;
797743
}
798744
GET_TC(tc)->npyarr = blkCtxt->npyCtxts[0];
745+
goto ARR_RET;
799746

800-
BLKRET:
801-
Py_DECREF(blocks);
747+
ARR_RET:
748+
Py_DECREF(arrays);
802749
}
803750

804751
void PdBlock_iterEnd(JSOBJ obj, JSONTypeContext *tc) {

pandas/core/internals/array_manager.py

+4
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,10 @@ def iget_values(self, i: int) -> ArrayLike:
919919
"""
920920
return self.arrays[i]
921921

922+
@property
923+
def column_arrays(self) -> list[ArrayLike]:
924+
return self.arrays
925+
922926
def iset(self, loc: int | slice | np.ndarray, value: ArrayLike):
923927
"""
924928
Set new column(s).

pandas/core/internals/blocks.py

-8
Original file line numberDiff line numberDiff line change
@@ -224,14 +224,6 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
224224
# expected "ndarray")
225225
return self.values # type: ignore[return-value]
226226

227-
@final
228-
def get_block_values_for_json(self) -> np.ndarray:
229-
"""
230-
This is used in the JSON C code.
231-
"""
232-
# TODO(EA2D): reshape will be unnecessary with 2D EAs
233-
return np.asarray(self.values).reshape(self.shape)
234-
235227
@final
236228
@cache_readonly
237229
def fill_value(self):

pandas/core/internals/managers.py

+4
Original file line numberDiff line numberDiff line change
@@ -1137,6 +1137,10 @@ def iget_values(self, i: int) -> ArrayLike:
11371137
values = block.iget(self.blklocs[i])
11381138
return values
11391139

1140+
@property
1141+
def column_arrays(self) -> list[ArrayLike]:
1142+
return [self.iget_values(i) for i in range(len(self.items))]
1143+
11401144
def iset(self, loc: int | slice | np.ndarray, value: ArrayLike):
11411145
"""
11421146
Set new item in-place. Does not consolidate. Adds new Block if not

pandas/tests/io/json/test_json_table_schema.py

-4
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
import numpy as np
77
import pytest
88

9-
import pandas.util._test_decorators as td
10-
119
from pandas.core.dtypes.dtypes import (
1210
CategoricalDtype,
1311
DatetimeTZDtype,
@@ -26,8 +24,6 @@
2624
set_default_names,
2725
)
2826

29-
pytestmark = td.skip_array_manager_not_yet_implemented
30-
3127

3228
class TestBuildSchema:
3329
def setup_method(self, method):

pandas/tests/io/json/test_pandas.py

-2
Original file line numberDiff line numberDiff line change
@@ -857,8 +857,6 @@ def test_convert_dates_infer(self, infer_word):
857857
result = read_json(dumps(data))[["id", infer_word]]
858858
tm.assert_frame_equal(result, expected)
859859

860-
# TODO(ArrayManager) JSON
861-
@td.skip_array_manager_not_yet_implemented
862860
@pytest.mark.parametrize(
863861
"date,date_unit",
864862
[

0 commit comments

Comments
 (0)