pandas/__init__.py

# flake8: noqa

__docformat__ = 'restructuredtext'

# Let users know if they're missing any of our hard dependencies
hard_dependencies = ("numpy", "pytz", "dateutil")
missing_dependencies = []

for dependency in hard_dependencies:
    try:
        __import__(dependency)
    except ImportError as e:
        missing_dependencies.append(dependency)

if missing_dependencies:
    raise ImportError(
        "Missing required dependencies {0}".format(missing_dependencies))
del hard_dependencies, dependency, missing_dependencies

# numpy compat
from pandas.compat.numpy import (
    _np_version_under1p14, _np_version_under1p15, _np_version_under1p16,
    _np_version_under1p17)

try:
    from pandas._libs import (hashtable as _hashtable,
                             lib as _lib,
                             tslib as _tslib)
except ImportError as e:  # pragma: no cover
    # hack but overkill to use re
    module = str(e).replace('cannot import name ', '')
    raise ImportError("C extension: {0} not built. If you want to import "
                      "pandas from the source directory, you may need to run "
                      "'python setup.py build_ext --inplace --force' to build "
                      "the C extensions first.".format(module))

from datetime import datetime

from pandas._config import (get_option, set_option, reset_option,
                            describe_option, option_context, options)

# let init-time option registration happen
import pandas.core.config_init

from pandas.core.api import (
    # dtype
    Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, UInt8Dtype,
    UInt16Dtype, UInt32Dtype, UInt64Dtype, CategoricalDtype,
    PeriodDtype, IntervalDtype, DatetimeTZDtype,

    # missing
    isna, isnull, notna, notnull,

    # indexes
    Index, CategoricalIndex, Int64Index, UInt64Index, RangeIndex,
    Float64Index, MultiIndex, IntervalIndex, TimedeltaIndex,
    DatetimeIndex, PeriodIndex, IndexSlice,

    # tseries
    NaT, Period, period_range, Timedelta, timedelta_range,
    Timestamp, date_range, bdate_range, Interval, interval_range,
    DateOffset,

    # conversion
    to_numeric, to_datetime, to_timedelta,

    # misc
    np, NamedAgg, TimeGrouper, Grouper, factorize, unique, value_counts,
    array, Categorical, set_eng_float_format, Series, DataFrame,
    Panel)

from pandas.core.sparse.api import (
    SparseArray, SparseDataFrame, SparseSeries, SparseDtype)

from pandas.tseries.api import infer_freq
from pandas.tseries import offsets

from pandas.core.computation.api import eval

from pandas.core.reshape.api import (
    concat, lreshape, melt, wide_to_long, merge, merge_asof,
    merge_ordered, crosstab, pivot, pivot_table, get_dummies,
    cut, qcut)

from pandas.util._print_versions import show_versions

from pandas.io.api import (
    # excel
    ExcelFile, ExcelWriter, read_excel,

    # packers
    read_msgpack, to_msgpack,

    # parsers
    read_csv, read_fwf, read_table,

    # pickle
    read_pickle, to_pickle,

    # pytables
    HDFStore, read_hdf,

    # sql
    read_sql, read_sql_query,
    read_sql_table,

    # misc
    read_clipboard, read_parquet, read_feather, read_gbq,
    read_html, read_json, read_stata, read_sas)

from pandas.util._tester import test
import pandas.testing
import pandas.arrays

# use the closest tagged version if possible
from ._version import get_versions
v = get_versions()
__version__ = v.get('closest-tag', v['version'])
__git_version__ = v.get('full-revisionid')
del get_versions, v

# module level doc-string
__doc__ = """
pandas - a powerful data analysis and manipulation library for Python
=====================================================================

**pandas** is a Python package providing fast, flexible, and expressive data
structures designed to make working with "relational" or "labeled" data both
easy and intuitive. It aims to be the fundamental high-level building block for
doing practical, **real world** data analysis in Python. Additionally, it has
the broader goal of becoming **the most powerful and flexible open source data
analysis / manipulation tool available in any language**. It is already well on
its way toward this goal.

Main Features
-------------
Here are just a few of the things that pandas does well:

  - Easy handling of missing data in floating point as well as non-floating
    point data.
  - Size mutability: columns can be inserted and deleted from DataFrame and
    higher dimensional objects
  - Automatic and explicit data alignment: objects can be explicitly aligned
    to a set of labels, or the user can simply ignore the labels and let
    `Series`, `DataFrame`, etc. automatically align the data for you in
    computations.
  - Powerful, flexible group by functionality to perform split-apply-combine
    operations on data sets, for both aggregating and transforming data.
  - Make it easy to convert ragged, differently-indexed data in other Python
    and NumPy data structures into DataFrame objects.
  - Intelligent label-based slicing, fancy indexing, and subsetting of large
    data sets.
  - Intuitive merging and joining data sets.
  - Flexible reshaping and pivoting of data sets.
  - Hierarchical labeling of axes (possible to have multiple labels per tick).
  - Robust IO tools for loading data from flat files (CSV and delimited),
    Excel files, databases, and saving/loading data from the ultrafast HDF5
    format.
  - Time series-specific functionality: date range generation and frequency
    conversion, moving window statistics, moving window linear regressions,
    date shifting and lagging, etc.
"""