Skip to content

Commit 009c893

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into cln-pytables-sigs
2 parents e40f054 + e28ebe3 commit 009c893

File tree

229 files changed

+3636
-10540
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

229 files changed

+3636
-10540
lines changed

LICENSES/MSGPACK_LICENSE

Lines changed: 0 additions & 13 deletions
This file was deleted.

LICENSES/MSGPACK_NUMPY_LICENSE

Lines changed: 0 additions & 33 deletions
This file was deleted.

MANIFEST.in

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ global-exclude *.gz
2020
global-exclude *.h5
2121
global-exclude *.html
2222
global-exclude *.json
23-
global-exclude *.msgpack
2423
global-exclude *.pickle
2524
global-exclude *.png
2625
global-exclude *.pyc

asv_bench/benchmarks/array.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import numpy as np
2+
3+
import pandas as pd
4+
5+
6+
class BooleanArray:
7+
def setup(self):
8+
self.values_bool = np.array([True, False, True, False])
9+
self.values_float = np.array([1.0, 0.0, 1.0, 0.0])
10+
self.values_integer = np.array([1, 0, 1, 0])
11+
self.values_integer_like = [1, 0, 1, 0]
12+
13+
def time_from_bool_array(self):
14+
pd.array(self.values_bool, dtype="boolean")
15+
16+
def time_from_integer_array(self):
17+
pd.array(self.values_integer, dtype="boolean")
18+
19+
def time_from_integer_like(self):
20+
pd.array(self.values_integer_like, dtype="boolean")
21+
22+
def time_from_float_array(self):
23+
pd.array(self.values_float, dtype="boolean")

asv_bench/benchmarks/boolean.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import numpy as np
2+
3+
import pandas as pd
4+
5+
6+
class TimeLogicalOps:
7+
def setup(self):
8+
N = 10_000
9+
left, right, lmask, rmask = np.random.randint(0, 2, size=(4, N)).astype("bool")
10+
self.left = pd.arrays.BooleanArray(left, lmask)
11+
self.right = pd.arrays.BooleanArray(right, rmask)
12+
13+
def time_or_scalar(self):
14+
self.left | True
15+
self.left | False
16+
17+
def time_or_array(self):
18+
self.left | self.right
19+
20+
def time_and_scalar(self):
21+
self.left & True
22+
self.left & False
23+
24+
def time_and_array(self):
25+
self.left & self.right
26+
27+
def time_xor_scalar(self):
28+
self.left ^ True
29+
self.left ^ False
30+
31+
def time_xor_array(self):
32+
self.left ^ self.right

asv_bench/benchmarks/frame_ctor.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,4 +105,16 @@ def time_frame_from_lists(self):
105105
self.df = DataFrame(self.data)
106106

107107

108+
class FromRange:
109+
110+
goal_time = 0.2
111+
112+
def setup(self):
113+
N = 1_000_000
114+
self.data = range(N)
115+
116+
def time_frame_from_range(self):
117+
self.df = DataFrame(self.data)
118+
119+
108120
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/frame_methods.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -321,10 +321,9 @@ class Dropna:
321321

322322
def setup(self, how, axis):
323323
self.df = DataFrame(np.random.randn(10000, 1000))
324-
with warnings.catch_warnings(record=True):
325-
self.df.ix[50:1000, 20:50] = np.nan
326-
self.df.ix[2000:3000] = np.nan
327-
self.df.ix[:, 60:70] = np.nan
324+
self.df.iloc[50:1000, 20:50] = np.nan
325+
self.df.iloc[2000:3000] = np.nan
326+
self.df.iloc[:, 60:70] = np.nan
328327
self.df_mixed = self.df.copy()
329328
self.df_mixed["foo"] = "bar"
330329

@@ -342,10 +341,9 @@ class Count:
342341

343342
def setup(self, axis):
344343
self.df = DataFrame(np.random.randn(10000, 1000))
345-
with warnings.catch_warnings(record=True):
346-
self.df.ix[50:1000, 20:50] = np.nan
347-
self.df.ix[2000:3000] = np.nan
348-
self.df.ix[:, 60:70] = np.nan
344+
self.df.iloc[50:1000, 20:50] = np.nan
345+
self.df.iloc[2000:3000] = np.nan
346+
self.df.iloc[:, 60:70] = np.nan
349347
self.df_mixed = self.df.copy()
350348
self.df_mixed["foo"] = "bar"
351349

asv_bench/benchmarks/indexing.py

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -67,22 +67,6 @@ def time_iloc_scalar(self, index, index_structure):
6767
def time_iloc_slice(self, index, index_structure):
6868
self.data.iloc[:800000]
6969

70-
def time_ix_array(self, index, index_structure):
71-
with warnings.catch_warnings(record=True):
72-
self.data.ix[self.array]
73-
74-
def time_ix_list_like(self, index, index_structure):
75-
with warnings.catch_warnings(record=True):
76-
self.data.ix[[800000]]
77-
78-
def time_ix_scalar(self, index, index_structure):
79-
with warnings.catch_warnings(record=True):
80-
self.data.ix[800000]
81-
82-
def time_ix_slice(self, index, index_structure):
83-
with warnings.catch_warnings(record=True):
84-
self.data.ix[:800000]
85-
8670
def time_loc_array(self, index, index_structure):
8771
self.data.loc[self.array]
8872

@@ -148,10 +132,6 @@ def setup(self):
148132
self.bool_indexer = self.df[self.col_scalar] > 0
149133
self.bool_obj_indexer = self.bool_indexer.astype(object)
150134

151-
def time_ix(self):
152-
with warnings.catch_warnings(record=True):
153-
self.df.ix[self.idx_scalar, self.col_scalar]
154-
155135
def time_loc(self):
156136
self.df.loc[self.idx_scalar, self.col_scalar]
157137

@@ -228,14 +208,6 @@ def setup(self):
228208
self.idx = IndexSlice[20000:30000, 20:30, 35:45, 30000:40000]
229209
self.mdt = self.mdt.set_index(["A", "B", "C", "D"]).sort_index()
230210

231-
def time_series_ix(self):
232-
with warnings.catch_warnings(record=True):
233-
self.s.ix[999]
234-
235-
def time_frame_ix(self):
236-
with warnings.catch_warnings(record=True):
237-
self.df.ix[999]
238-
239211
def time_index_slice(self):
240212
self.mdt.loc[self.idx, :]
241213

@@ -310,10 +282,6 @@ def setup_cache(self):
310282
def time_lookup_iloc(self, s):
311283
s.iloc
312284

313-
def time_lookup_ix(self, s):
314-
with warnings.catch_warnings(record=True):
315-
s.ix
316-
317285
def time_lookup_loc(self, s):
318286
s.loc
319287

asv_bench/benchmarks/io/msgpack.py

Lines changed: 0 additions & 32 deletions
This file was deleted.

asv_bench/benchmarks/io/sas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,5 @@ def setup(self, format):
2626
]
2727
self.f = os.path.join(*paths)
2828

29-
def time_read_msgpack(self, format):
29+
def time_read_sas(self, format):
3030
read_sas(self.f, format=format)

ci/code_checks.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,10 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
9494

9595
# We don't lint all C files because we don't want to lint any that are built
9696
# from Cython files nor do we want to lint C files that we didn't modify for
97-
# this particular codebase (e.g. src/headers, src/klib, src/msgpack). However,
97+
# this particular codebase (e.g. src/headers, src/klib). However,
9898
# we can lint all header files since they aren't "generated" like C files are.
9999
MSG='Linting .c and .h' ; echo $MSG
100-
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/io/msgpack pandas/_libs/*.cpp pandas/util
100+
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp
101101
RET=$(($RET + $?)) ; echo $MSG "DONE"
102102

103103
echo "isort --version-number"
@@ -281,6 +281,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
281281
pytest -q --doctest-modules pandas/core/arrays/string_.py
282282
RET=$(($RET + $?)) ; echo $MSG "DONE"
283283

284+
MSG='Doctests arrays/boolean.py' ; echo $MSG
285+
pytest -q --doctest-modules pandas/core/arrays/boolean.py
286+
RET=$(($RET + $?)) ; echo $MSG "DONE"
287+
284288
fi
285289

286290
### DOCSTRINGS ###

ci/deps/azure-36-locale_slow.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ dependencies:
1818
- lxml
1919
- matplotlib=2.2.2
2020
- numpy=1.14.*
21-
- openpyxl=2.4.8
21+
- openpyxl=2.5.7
2222
- python-dateutil
2323
- python-blosc
2424
- pytz=2017.2

ci/deps/azure-36-minimum_versions.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@ dependencies:
1111
- pytest-xdist>=1.21
1212
- hypothesis>=3.58.0
1313
- pytest-azurepipelines
14+
- psutil
1415

1516
# pandas dependencies
1617
- beautifulsoup4=4.6.0
1718
- bottleneck=1.2.1
1819
- jinja2=2.8
1920
- numexpr=2.6.2
2021
- numpy=1.13.3
21-
- openpyxl=2.4.8
22+
- openpyxl=2.5.7
2223
- pytables=3.4.2
2324
- python-dateutil=2.6.1
2425
- pytz=2017.2

doc/redirects.csv

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,6 @@ generated/pandas.DataFrame.to_hdf,../reference/api/pandas.DataFrame.to_hdf
491491
generated/pandas.DataFrame.to,../reference/api/pandas.DataFrame.to
492492
generated/pandas.DataFrame.to_json,../reference/api/pandas.DataFrame.to_json
493493
generated/pandas.DataFrame.to_latex,../reference/api/pandas.DataFrame.to_latex
494-
generated/pandas.DataFrame.to_msgpack,../reference/api/pandas.DataFrame.to_msgpack
495494
generated/pandas.DataFrame.to_numpy,../reference/api/pandas.DataFrame.to_numpy
496495
generated/pandas.DataFrame.to_panel,../reference/api/pandas.DataFrame.to_panel
497496
generated/pandas.DataFrame.to_parquet,../reference/api/pandas.DataFrame.to_parquet
@@ -618,7 +617,6 @@ generated/pandas.Index.asi8,../reference/api/pandas.Index.asi8
618617
generated/pandas.Index.asof,../reference/api/pandas.Index.asof
619618
generated/pandas.Index.asof_locs,../reference/api/pandas.Index.asof_locs
620619
generated/pandas.Index.astype,../reference/api/pandas.Index.astype
621-
generated/pandas.Index.contains,../reference/api/pandas.Index.contains
622620
generated/pandas.Index.copy,../reference/api/pandas.Index.copy
623621
generated/pandas.Index.data,../reference/api/pandas.Index.data
624622
generated/pandas.Index.delete,../reference/api/pandas.Index.delete
@@ -780,7 +778,6 @@ generated/pandas.io.formats.style.Styler.use,../reference/api/pandas.io.formats.
780778
generated/pandas.io.formats.style.Styler.where,../reference/api/pandas.io.formats.style.Styler.where
781779
generated/pandas.io.json.build_table_schema,../reference/api/pandas.io.json.build_table_schema
782780
generated/pandas.io.json.json_normalize,../reference/api/pandas.io.json.json_normalize
783-
generated/pandas.io.stata.StataReader.data,../reference/api/pandas.io.stata.StataReader.data
784781
generated/pandas.io.stata.StataReader.data_label,../reference/api/pandas.io.stata.StataReader.data_label
785782
generated/pandas.io.stata.StataReader.value_labels,../reference/api/pandas.io.stata.StataReader.value_labels
786783
generated/pandas.io.stata.StataReader.variable_labels,../reference/api/pandas.io.stata.StataReader.variable_labels
@@ -891,7 +888,6 @@ generated/pandas.read_gbq,../reference/api/pandas.read_gbq
891888
generated/pandas.read_hdf,../reference/api/pandas.read_hdf
892889
generated/pandas.read,../reference/api/pandas.read
893890
generated/pandas.read_json,../reference/api/pandas.read_json
894-
generated/pandas.read_msgpack,../reference/api/pandas.read_msgpack
895891
generated/pandas.read_parquet,../reference/api/pandas.read_parquet
896892
generated/pandas.read_pickle,../reference/api/pandas.read_pickle
897893
generated/pandas.read_sas,../reference/api/pandas.read_sas
@@ -1232,7 +1228,6 @@ generated/pandas.Series.to_json,../reference/api/pandas.Series.to_json
12321228
generated/pandas.Series.to_latex,../reference/api/pandas.Series.to_latex
12331229
generated/pandas.Series.to_list,../reference/api/pandas.Series.to_list
12341230
generated/pandas.Series.tolist,../reference/api/pandas.Series.tolist
1235-
generated/pandas.Series.to_msgpack,../reference/api/pandas.Series.to_msgpack
12361231
generated/pandas.Series.to_numpy,../reference/api/pandas.Series.to_numpy
12371232
generated/pandas.Series.to_period,../reference/api/pandas.Series.to_period
12381233
generated/pandas.Series.to_pickle,../reference/api/pandas.Series.to_pickle

doc/source/development/developer.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,6 @@ The ``metadata`` field is ``None`` except for:
125125
in ``BYTE_ARRAY`` Parquet columns. The encoding can be one of:
126126

127127
* ``'pickle'``
128-
* ``'msgpack'``
129128
* ``'bson'``
130129
* ``'json'``
131130

doc/source/getting_started/install.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -249,16 +249,16 @@ PyTables 3.4.2 HDF5-based reading / writing
249249
SQLAlchemy 1.1.4 SQL support for databases other than sqlite
250250
SciPy 0.19.0 Miscellaneous statistical functions
251251
XLsxWriter 0.9.8 Excel writing
252-
blosc Compression for msgpack
252+
blosc Compression for HDF5
253253
fastparquet 0.3.2 Parquet reading / writing
254254
gcsfs 0.2.2 Google Cloud Storage access
255255
html5lib HTML parser for read_html (see :ref:`note <optional_html>`)
256256
lxml 3.8.0 HTML parser for read_html (see :ref:`note <optional_html>`)
257257
matplotlib 2.2.2 Visualization
258-
openpyxl 2.4.8 Reading / writing for xlsx files
258+
openpyxl 2.5.7 Reading / writing for xlsx files
259259
pandas-gbq 0.8.0 Google Big Query access
260260
psycopg2 PostgreSQL engine for sqlalchemy
261-
pyarrow 0.12.0 Parquet and feather reading / writing
261+
pyarrow 0.12.0 Parquet, ORC (requires 0.13.0), and feather reading / writing
262262
pymysql 0.7.11 MySQL engine for sqlalchemy
263263
pyreadstat SPSS files (.sav) reading
264264
pytables 3.4.2 HDF5 reading / writing
@@ -269,7 +269,7 @@ xclip Clipboard I/O on linux
269269
xlrd 1.1.0 Excel reading
270270
xlwt 1.2.0 Excel writing
271271
xsel Clipboard I/O on linux
272-
zlib Compression for msgpack
272+
zlib Compression for HDF5
273273
========================= ================== =============================================================
274274

275275
.. _optional_html:

doc/source/index.rst.template

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ See the :ref:`overview` for more detail about what's in the library.
7373
* :doc:`user_guide/missing_data`
7474
* :doc:`user_guide/categorical`
7575
* :doc:`user_guide/integer_na`
76+
* :doc:`user_guide/boolean`
7677
* :doc:`user_guide/visualization`
7778
* :doc:`user_guide/computation`
7879
* :doc:`user_guide/groupby`

0 commit comments

Comments
 (0)