|
1 | 1 | from __future__ import annotations
|
2 | 2 |
|
3 |
| -from collections import defaultdict |
4 | 3 | import itertools
|
5 | 4 | from typing import (
|
6 | 5 | Any,
|
7 | 6 | Callable,
|
8 |
| - DefaultDict, |
9 | 7 | Hashable,
|
10 | 8 | Sequence,
|
11 | 9 | TypeVar,
|
|
67 | 65 | )
|
68 | 66 | from pandas.core.internals.blocks import (
|
69 | 67 | Block,
|
70 |
| - CategoricalBlock, |
71 | 68 | DatetimeTZBlock,
|
72 |
| - ExtensionBlock, |
73 | 69 | ensure_block_shape,
|
74 | 70 | extend_blocks,
|
75 | 71 | get_block_type,
|
@@ -1863,63 +1859,56 @@ def construction_error(
|
1863 | 1859 | # -----------------------------------------------------------------------
|
1864 | 1860 |
|
1865 | 1861 |
|
1866 |
| -def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]: |
1867 |
| - |
1868 |
| - items_dict: DefaultDict[str, list] = defaultdict(list) |
1869 |
| - |
1870 |
| - for i, name_idx in enumerate(range(len(arrays))): |
| 1862 | +def _grouping_func(tup): |
| 1863 | + # compat for numpy<1.21, in which comparing a np.dtype with an ExtensionDtype |
| 1864 | + # raises instead of returning False. Once earlier numpy versions are dropped, |
| 1865 | + # this can be simplified to `return tup[1].dtype` |
| 1866 | + dtype = tup[1].dtype |
| 1867 | + return isinstance(dtype, np.dtype), dtype |
1871 | 1868 |
|
1872 |
| - v = arrays[name_idx] |
1873 | 1869 |
|
1874 |
| - block_type = get_block_type(v) |
1875 |
| - items_dict[block_type.__name__].append((i, v)) |
| 1870 | +def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]: |
| 1871 | + tuples = list(enumerate(arrays)) |
1876 | 1872 |
|
1877 |
| - blocks: list[Block] = [] |
1878 |
| - if len(items_dict["NumericBlock"]): |
1879 |
| - numeric_blocks = multi_blockify( |
1880 |
| - items_dict["NumericBlock"], consolidate=consolidate |
1881 |
| - ) |
1882 |
| - blocks.extend(numeric_blocks) |
| 1873 | + if not consolidate: |
| 1874 | + nbs = _tuples_to_blocks_no_consolidate(tuples, dtype=None) |
| 1875 | + return nbs |
1883 | 1876 |
|
1884 |
| - if len(items_dict["DatetimeLikeBlock"]): |
1885 |
| - dtlike_blocks = multi_blockify( |
1886 |
| - items_dict["DatetimeLikeBlock"], consolidate=consolidate |
1887 |
| - ) |
1888 |
| - blocks.extend(dtlike_blocks) |
| 1877 | + # group by dtype |
| 1878 | + grouper = itertools.groupby(tuples, _grouping_func) |
1889 | 1879 |
|
1890 |
| - if len(items_dict["DatetimeTZBlock"]): |
1891 |
| - dttz_blocks = [ |
1892 |
| - DatetimeTZBlock( |
1893 |
| - ensure_block_shape(extract_array(array), 2), |
1894 |
| - placement=BlockPlacement(i), |
1895 |
| - ndim=2, |
1896 |
| - ) |
1897 |
| - for i, array in items_dict["DatetimeTZBlock"] |
1898 |
| - ] |
1899 |
| - blocks.extend(dttz_blocks) |
| 1880 | + nbs = [] |
| 1881 | + for (_, dtype), tup_block in grouper: |
| 1882 | + block_type = get_block_type(None, dtype) |
1900 | 1883 |
|
1901 |
| - if len(items_dict["ObjectBlock"]) > 0: |
1902 |
| - object_blocks = simple_blockify( |
1903 |
| - items_dict["ObjectBlock"], np.object_, consolidate=consolidate |
1904 |
| - ) |
1905 |
| - blocks.extend(object_blocks) |
| 1884 | + if isinstance(dtype, np.dtype): |
| 1885 | + is_dtlike = dtype.kind in ["m", "M"] |
1906 | 1886 |
|
1907 |
| - if len(items_dict["CategoricalBlock"]) > 0: |
1908 |
| - cat_blocks = [ |
1909 |
| - CategoricalBlock(array, placement=BlockPlacement(i), ndim=2) |
1910 |
| - for i, array in items_dict["CategoricalBlock"] |
1911 |
| - ] |
1912 |
| - blocks.extend(cat_blocks) |
| 1887 | + if issubclass(dtype.type, (str, bytes)): |
| 1888 | + dtype = np.dtype(object) |
1913 | 1889 |
|
1914 |
| - if len(items_dict["ExtensionBlock"]): |
1915 |
| - external_blocks = [ |
1916 |
| - ExtensionBlock(array, placement=BlockPlacement(i), ndim=2) |
1917 |
| - for i, array in items_dict["ExtensionBlock"] |
1918 |
| - ] |
| 1890 | + values, placement = _stack_arrays(list(tup_block), dtype) |
| 1891 | + if is_dtlike: |
| 1892 | + values = ensure_wrapped_if_datetimelike(values) |
| 1893 | + blk = block_type(values, placement=BlockPlacement(placement), ndim=2) |
| 1894 | + nbs.append(blk) |
1919 | 1895 |
|
1920 |
| - blocks.extend(external_blocks) |
| 1896 | + elif is_1d_only_ea_dtype(dtype): |
| 1897 | + dtype_blocks = [ |
| 1898 | + block_type(x[1], placement=BlockPlacement(x[0]), ndim=2) |
| 1899 | + for x in tup_block |
| 1900 | + ] |
| 1901 | + nbs.extend(dtype_blocks) |
1921 | 1902 |
|
1922 |
| - return blocks |
| 1903 | + else: |
| 1904 | + dtype_blocks = [ |
| 1905 | + block_type( |
| 1906 | + ensure_block_shape(x[1], 2), placement=BlockPlacement(x[0]), ndim=2 |
| 1907 | + ) |
| 1908 | + for x in tup_block |
| 1909 | + ] |
| 1910 | + nbs.extend(dtype_blocks) |
| 1911 | + return nbs |
1923 | 1912 |
|
1924 | 1913 |
|
1925 | 1914 | def simple_blockify(tuples, dtype, consolidate: bool) -> list[Block]:
|
@@ -1970,11 +1959,16 @@ def _tuples_to_blocks_no_consolidate(tuples, dtype: DtypeObj | None) -> list[Blo
|
1970 | 1959 | if dtype is not None:
|
1971 | 1960 | return [
|
1972 | 1961 | new_block(
|
1973 |
| - np.atleast_2d(x[1].astype(dtype, copy=False)), placement=x[0], ndim=2 |
| 1962 | + ensure_block_shape(x[1].astype(dtype, copy=False), ndim=2), |
| 1963 | + placement=x[0], |
| 1964 | + ndim=2, |
1974 | 1965 | )
|
1975 | 1966 | for x in tuples
|
1976 | 1967 | ]
|
1977 |
| - return [new_block(np.atleast_2d(x[1]), placement=x[0], ndim=2) for x in tuples] |
| 1968 | + return [ |
| 1969 | + new_block(ensure_block_shape(x[1], ndim=2), placement=x[0], ndim=2) |
| 1970 | + for x in tuples |
| 1971 | + ] |
1978 | 1972 |
|
1979 | 1973 |
|
1980 | 1974 | def _stack_arrays(tuples, dtype: np.dtype):
|
|
0 commit comments