Skip to content

API: pseudo-public internals API for downstream libraries #40182

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 5, 2021
4 changes: 2 additions & 2 deletions pandas/core/internals/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pandas.core.internals.api import make_block # pseudo-public version
from pandas.core.internals.array_manager import ArrayManager
from pandas.core.internals.base import DataManager
from pandas.core.internals.blocks import ( # io.pytables, io.packers
from pandas.core.internals.blocks import ( # io.pytables
Block,
CategoricalBlock,
DatetimeBlock,
Expand All @@ -10,7 +11,6 @@
NumericBlock,
ObjectBlock,
TimeDeltaBlock,
make_block,
)
from pandas.core.internals.concat import concatenate_managers
from pandas.core.internals.managers import (
Expand Down
61 changes: 61 additions & 0 deletions pandas/core/internals/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a test for this model that asserts the exact name that we expose, similar to https://github.com/pandas-dev/pandas/blob/master/pandas/tests/api/test_api.py

This is a pseudo-public API for downstream libraries. We ask that downstream
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have these import to pandas.api.internals i think (then you can really control the exports)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i know pyarrow accesses the pd.core.internals namespace. not sure about others. we can ask them to change, but for the forseeable future will need these in the namespace.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right what i mean is let's expose a wrapper api namespace and then we can change the downstream packages when we have released.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fair enough. keep it in this file though? im wary of adding it to the pd.api namespace lest new downstream packages adopt bad habits

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok sure

authors

1) Try to avoid using internals directly altogether, and failing that,
2) Use only functions exposed here (or in core.internals)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally we remove 2 as soon as possible.


"""
from typing import Optional

import numpy as np

from pandas._typing import Dtype

from pandas.core.dtypes.common import is_datetime64tz_dtype
from pandas.core.dtypes.dtypes import PandasDtype
from pandas.core.dtypes.generic import ABCPandasArray

from pandas.core.arrays import DatetimeArray
from pandas.core.internals.blocks import (
Block,
DatetimeTZBlock,
get_block_type,
)


def make_block(
values, placement, klass=None, ndim=None, dtype: Optional[Dtype] = None
) -> Block:
"""
This is a pseudo-public analogue to blocks.make_block.

We ask that downstream libraries use this rather than any fully-internal
APIs, including but not limited to:

- core.internals.blocks.make_block
- Block.make_block
- Block.make_block_same_class
- Block.__init__
"""
if isinstance(values, ABCPandasArray):
# Ensure that we don't allow PandasArray / PandasDtype in internals.
# For now, blocks should be backed by ndarrays when possible.
values = values.to_numpy()
if ndim and ndim > 1:
# TODO(EA2D): special case not needed with 2D EAs
values = np.atleast_2d(values)

if isinstance(dtype, PandasDtype):
dtype = dtype.numpy_dtype

if klass is None:
dtype = dtype or values.dtype
klass = get_block_type(values, dtype)

elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype):
# TODO: This is no longer hit internally; does it need to be retained
# for e.g. pyarrow?
values = DatetimeArray._simple_new(values, dtype=dtype)

return klass(values, ndim=ndim, placement=placement)
2 changes: 1 addition & 1 deletion pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
from pandas.core.internals import (
BlockManager,
SingleBlockManager,
make_block,
)
from pandas.core.internals.blocks import make_block # private version


@pytest.fixture
Expand Down